simulstream 0.1.0__py3-none-any.whl → 0.2.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
docs/source/conf.py CHANGED
@@ -4,6 +4,7 @@
4
4
  # https://www.sphinx-doc.org/en/master/usage/configuration.html
5
5
  import os
6
6
  import sys
7
+ from simulstream import __version__
7
8
 
8
9
  sys.path.insert(0, os.path.abspath('../../'))
9
10
 
@@ -13,7 +14,7 @@ sys.path.insert(0, os.path.abspath('../../'))
13
14
  project = 'simulstream'
14
15
  copyright = '2025, FBK'
15
16
  author = 'Marco Gaido, FBK MT Unit'
16
- release = '0.1.0'
17
+ release = __version__
17
18
 
18
19
  # -- General configuration ---------------------------------------------------
19
20
  # https://www.sphinx-doc.org/en/master/usage/configuration.html#general-configuration
@@ -0,0 +1 @@
1
+ 0.2.0
simulstream/__init__.py CHANGED
@@ -12,4 +12,8 @@
12
12
  # See the License for the specific language governing permissions and
13
13
  # limitations under the License
14
14
 
15
- __version__ = '0.1.0'
15
+ from pathlib import Path
16
+
17
+
18
+ with Path(__file__).with_name('VERSION.txt').open('r') as f:
19
+ __version__ = f.read().strip()
simulstream/inference.py CHANGED
@@ -49,8 +49,10 @@ def process_audio(
49
49
  sample_rate (int): Audio sample rate (Hz).
50
50
  data (np.ndarray): Audio samples as int16 array.
51
51
  """
52
+ # speech_chunk_size is expressed in seconds, so the number of samples corresponding to
53
+ # one speech chunk is the following
52
54
  samples_per_chunk = int(
53
- sample_rate * message_processor.speech_processor.speech_chunk_size / 1000.0)
55
+ sample_rate * message_processor.speech_processor.speech_chunk_size)
54
56
  i = 0
55
57
  for i in range(0, len(data), samples_per_chunk):
56
58
  output = message_processor.process_speech(data[i:i + samples_per_chunk].tobytes())
@@ -0,0 +1,115 @@
1
+ # Copyright 2026 FBK
2
+
3
+ # Licensed under the Apache License, Version 2.0 (the "License");
4
+ # you may not use this file except in compliance with the License.
5
+ # You may obtain a copy of the License at
6
+
7
+ # http://www.apache.org/licenses/LICENSE-2.0
8
+
9
+ # Unless required by applicable law or agreed to in writing, software
10
+ # distributed under the License is distributed on an "AS IS" BASIS,
11
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
+ # See the License for the specific language governing permissions and
13
+ # limitations under the License
14
+
15
+ import base64
16
+ import json
17
+ from http import HTTPStatus
18
+ from typing import List, Any, Dict, Optional
19
+ import uuid
20
+ import urllib.request
21
+
22
+ import numpy as np
23
+
24
+ from simulstream.server.speech_processors import SpeechProcessor, IncrementalOutput
25
+
26
+
27
+ class HttpProxySpeechProcessor(SpeechProcessor):
28
+ """
29
+ HTTP-based proxy implementation of :class:`SpeechProcessor`.
30
+
31
+ This class does not perform speech processing locally. Instead, it forwards
32
+ all method calls to a remote speech processor exposed via HTTP, maintaining
33
+ a dedicated session on the server side.
34
+
35
+ Each instance of this class corresponds to exactly one remote session.
36
+ """
37
+
38
+ @classmethod
39
+ def load_model(cls, config):
40
+ pass
41
+
42
+ def __init__(self, config):
43
+ super().__init__(config)
44
+ self.base_url = f"http://{config.hostname}:{config.port}/"
45
+ self.session_id = uuid.uuid4().hex
46
+ self._cached_speech_chunk_size = None
47
+
48
+ def _http_request(
49
+ self, path: str, method: str, payload: Dict[str, Any]) -> Optional[Dict[str, Any]]:
50
+ data = json.dumps(payload).encode("utf-8")
51
+ req = urllib.request.Request(
52
+ self.base_url + path,
53
+ data=data,
54
+ headers={"Content-Type": "application/json"},
55
+ method=method,
56
+ )
57
+ with urllib.request.urlopen(req) as resp:
58
+ if resp.status == HTTPStatus.NO_CONTENT:
59
+ return None
60
+ return json.loads(resp.read())
61
+
62
+ @staticmethod
63
+ def _to_incremental_outputs(json_dict: Dict[str, Any]):
64
+ return IncrementalOutput(
65
+ new_tokens=json_dict["new_tokens"],
66
+ new_string=json_dict["new_string"],
67
+ deleted_tokens=json_dict["deleted_tokens"],
68
+ deleted_string=json_dict["deleted_string"]
69
+ )
70
+
71
+ @property
72
+ def speech_chunk_size(self) -> float:
73
+ if self._cached_speech_chunk_size is None:
74
+ response = self._http_request("speech_chunk_size", "GET", {
75
+ "session_id": self.session_id
76
+ })
77
+ self._cached_speech_chunk_size = response["speech_chunk_size"]
78
+ return self._cached_speech_chunk_size
79
+
80
+ def process_chunk(self, waveform: np.float32) -> IncrementalOutput:
81
+ response = self._http_request("process_chunk", "POST", {
82
+ "session_id": self.session_id,
83
+ "waveform": base64.b64encode(waveform.tobytes()).decode("utf-8"),
84
+ })
85
+ return self._to_incremental_outputs(response)
86
+
87
+ def set_source_language(self, language):
88
+ self._http_request("source_language", "PUT", {
89
+ "session_id": self.session_id,
90
+ "language": language,
91
+ })
92
+
93
+ def set_target_language(self, language):
94
+ self._http_request("target_language", "PUT", {
95
+ "session_id": self.session_id,
96
+ "language": language,
97
+ })
98
+
99
+ def end_of_stream(self) -> IncrementalOutput:
100
+ response = self._http_request("end_of_stream", "POST", {
101
+ "session_id": self.session_id,
102
+ })
103
+ return self._to_incremental_outputs(response)
104
+
105
+ def clear(self):
106
+ self._http_request("clear", "POST", {
107
+ "session_id": self.session_id,
108
+ })
109
+
110
+ def tokens_to_string(self, tokens: List[str]) -> str:
111
+ response = self._http_request("tokens_to_string", "GET", {
112
+ "session_id": self.session_id,
113
+ "tokens": tokens,
114
+ })
115
+ return response["tokens_as_string"]
@@ -0,0 +1,221 @@
1
+ # Copyright 2026 FBK
2
+
3
+ # Licensed under the Apache License, Version 2.0 (the "License");
4
+ # you may not use this file except in compliance with the License.
5
+ # You may obtain a copy of the License at
6
+
7
+ # http://www.apache.org/licenses/LICENSE-2.0
8
+
9
+ # Unless required by applicable law or agreed to in writing, software
10
+ # distributed under the License is distributed on an "AS IS" BASIS,
11
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
+ # See the License for the specific language governing permissions and
13
+ # limitations under the License
14
+
15
+ import argparse
16
+ import base64
17
+ import json
18
+ import time
19
+ import logging
20
+ from functools import partial
21
+ from http import HTTPStatus
22
+ from http.server import ThreadingHTTPServer, BaseHTTPRequestHandler
23
+ from queue import Queue
24
+ import threading
25
+ from types import SimpleNamespace
26
+ from typing import Dict, Any, Optional
27
+
28
+ import numpy as np
29
+
30
+ import simulstream
31
+ from simulstream.config import yaml_config
32
+ from simulstream.server.speech_processors import build_speech_processor, SpeechProcessor
33
+
34
+
35
+ logging.basicConfig(
36
+ format='%(asctime)s | %(levelname)s | %(name)s | %(message)s',
37
+ datefmt='%Y-%m-%d %H:%M:%S',
38
+ level=logging.INFO,
39
+ )
40
+ LOGGER = logging.getLogger(
41
+ 'simulstream.server.speech_processors.http.http_speech_processor_server')
42
+
43
+
44
+ class SpeechProcessorSessionManager:
45
+ def __init__(self, speech_processor_config: SimpleNamespace, size: int, ttl: float):
46
+ """
47
+ Args:
48
+ speech_processor_config: Configuration of the speech processors to create.
49
+ size: How many speech processors to use.
50
+ ttl: How long a session may stay idle before cleanup (in seconds).
51
+ """
52
+ self._sessions = {}
53
+ self._last_access = {}
54
+ self._lock = threading.Lock()
55
+ self.size = size
56
+ self.ttl = ttl
57
+ self.available = Queue(maxsize=size)
58
+ for _ in range(size):
59
+ self.available.put_nowait(build_speech_processor(speech_processor_config))
60
+
61
+ # starting cleanup loop
62
+ self._cleanup_stop_event = threading.Event()
63
+ self._cleanup_thread = threading.Thread(
64
+ target=self._cleanup,
65
+ daemon=True,
66
+ )
67
+ self._cleanup_thread.start()
68
+
69
+ def get(self, session_id) -> SpeechProcessor:
70
+ with self._lock:
71
+ if session_id not in self._sessions:
72
+ self._sessions[session_id] = self.available.get_nowait()
73
+ LOGGER.info(
74
+ f"Speech processor allocated to {session_id}, speech processors available: "
75
+ f"{self.available.qsize()}")
76
+ self._last_access[session_id] = time.time()
77
+ return self._sessions[session_id]
78
+
79
+ def is_active(self, session_id) -> bool:
80
+ with self._lock:
81
+ return session_id in self._sessions
82
+
83
+ def close_session(self, session_id):
84
+ with self._lock:
85
+ if session_id in self._sessions:
86
+ speech_processor = self._sessions.pop(session_id)
87
+ speech_processor.clear()
88
+ self.available.put_nowait(speech_processor)
89
+ LOGGER.info(
90
+ f"Session {session_id} closed, speech processors available: "
91
+ f"{self.available.qsize()}")
92
+ if session_id in self._last_access:
93
+ self._last_access.pop(session_id)
94
+
95
+ def _cleanup(self):
96
+ while not self._cleanup_stop_event.is_set():
97
+ time.sleep(self.ttl)
98
+ now = time.time()
99
+ expired = []
100
+ with self._lock:
101
+ for session_id in self._sessions.keys():
102
+ if session_id not in self._last_access or \
103
+ now - self._last_access[session_id] > self.ttl:
104
+ expired.append(session_id)
105
+
106
+ for session_id in expired:
107
+ self.close_session(session_id)
108
+
109
+ def shutdown(self) -> None:
110
+ self._cleanup_stop_event.set()
111
+ self._cleanup_thread.join()
112
+
113
+
114
+ class HttpSpeechProcessorHandler(BaseHTTPRequestHandler):
115
+ def __init__(
116
+ self, *args, speech_processor_manager: SpeechProcessorSessionManager = None, **kwargs):
117
+ self.speech_processor_manager = speech_processor_manager
118
+ super().__init__(*args, **kwargs)
119
+
120
+ def _read_json(self) -> dict:
121
+ length = int(self.headers.get("Content-Length", "0"))
122
+ data = self.rfile.read(length)
123
+ return json.loads(data)
124
+
125
+ def _send_json_response(self, code: int, message: Optional[Dict[str, Any]] = None):
126
+ self.send_response(code)
127
+ self.send_header("Content-type", "application/json; charset=utf-8")
128
+ self.end_headers()
129
+ if message is not None:
130
+ self.wfile.write(json.dumps(message).encode("utf-8"))
131
+ else:
132
+ self.wfile.write("".encode("utf-8"))
133
+
134
+ def do_GET(self):
135
+ function_handler = getattr(self, "get_" + self.path.strip("/"))
136
+ function_handler(**self._read_json())
137
+
138
+ def do_POST(self):
139
+ function_handler = getattr(self, "post_" + self.path.strip("/"))
140
+ function_handler(**self._read_json())
141
+
142
+ def do_PUT(self):
143
+ function_handler = getattr(self, "put_" + self.path.strip("/"))
144
+ function_handler(**self._read_json())
145
+
146
+ def get_speech_chunk_size(self, session_id):
147
+ processor = self.speech_processor_manager.get(session_id)
148
+ self._send_json_response(HTTPStatus.OK, {"speech_chunk_size": processor.speech_chunk_size})
149
+
150
+ def post_process_chunk(self, session_id, waveform):
151
+ processor = self.speech_processor_manager.get(session_id)
152
+ output = processor.process_chunk(
153
+ np.frombuffer(base64.b64decode(waveform), dtype=np.float32))
154
+ self._send_json_response(HTTPStatus.OK, {
155
+ "new_tokens": output.new_tokens,
156
+ "new_string": output.new_string,
157
+ "deleted_tokens": output.deleted_tokens,
158
+ "deleted_string": output.deleted_string,
159
+ })
160
+
161
+ def put_source_language(self, session_id, language):
162
+ processor = self.speech_processor_manager.get(session_id)
163
+ processor.set_source_language(language)
164
+ self._send_json_response(HTTPStatus.NO_CONTENT)
165
+
166
+ def put_target_language(self, session_id, language):
167
+ processor = self.speech_processor_manager.get(session_id)
168
+ processor.set_target_language(language)
169
+ self._send_json_response(HTTPStatus.NO_CONTENT)
170
+
171
+ def post_end_of_stream(self, session_id):
172
+ processor = self.speech_processor_manager.get(session_id)
173
+ output = processor.end_of_stream()
174
+ self._send_json_response(HTTPStatus.OK, {
175
+ "new_tokens": output.new_tokens,
176
+ "new_string": output.new_string,
177
+ "deleted_tokens": output.deleted_tokens,
178
+ "deleted_string": output.deleted_string,
179
+ })
180
+
181
+ def post_clear(self, session_id):
182
+ if self.speech_processor_manager.is_active(session_id):
183
+ self.speech_processor_manager.close_session(session_id)
184
+ self._send_json_response(HTTPStatus.NO_CONTENT)
185
+
186
+ def get_tokens_to_string(self, session_id, tokens):
187
+ processor = self.speech_processor_manager.get(session_id)
188
+ output = processor.tokens_to_string(tokens)
189
+ self._send_json_response(HTTPStatus.OK, {"tokens_as_string": output})
190
+
191
+
192
+ def serve(args: argparse.Namespace):
193
+ LOGGER.info(f"Loading server configuration from {args.server_config}")
194
+ server_config = yaml_config(args.server_config)
195
+ LOGGER.info(f"Loading speech processor from {args.speech_processor_config}")
196
+ speech_processor_loading_time = time.time()
197
+ speech_processor_session_manager = SpeechProcessorSessionManager(
198
+ yaml_config(args.speech_processor_config), server_config.pool_size, server_config.ttl
199
+ )
200
+ speech_processor_loading_time = time.time() - speech_processor_loading_time
201
+ LOGGER.info(f"Loaded speech processor in {speech_processor_loading_time:.3f} seconds")
202
+
203
+ custom_handler = partial(
204
+ HttpSpeechProcessorHandler, speech_processor_manager=speech_processor_session_manager)
205
+ httpd = ThreadingHTTPServer((server_config.hostname, server_config.port), custom_handler)
206
+ LOGGER.info(f"Serving on http://{server_config.hostname}:{server_config.port}")
207
+ httpd.serve_forever()
208
+ speech_processor_session_manager.shutdown()
209
+
210
+
211
+ def main():
212
+ LOGGER.info(f"HTTP speech processor server version: {simulstream.__version__}")
213
+ parser = argparse.ArgumentParser("http_speech_processor_server")
214
+ parser.add_argument("--server-config", type=str, default="config/http_server_example.yaml")
215
+ parser.add_argument("--speech-processor-config", type=str, required=True)
216
+ args = parser.parse_args()
217
+ serve(args)
218
+
219
+
220
+ if __name__ == "__main__":
221
+ main()
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: simulstream
3
- Version: 0.1.0
3
+ Version: 0.2.0
4
4
  Summary: A server to run simultaneous/streaming experiments and demo
5
5
  Author-email: Marco Gaido <mgaido@fbk.eu>, FBK HLT-MT <mt@fbk.eu>
6
6
  License: Apache License
@@ -1,7 +1,8 @@
1
- docs/source/conf.py,sha256=aQYxz7x86AkqqgiPnPN8e83RY-nX7D-3WnqYGHRdfmI,1453
2
- simulstream/__init__.py,sha256=YAGVogIANy5VPzFvbYzs5m1L9PLy5XBu36Oe7dmGR2o,586
1
+ docs/source/conf.py,sha256=Z63SwVSOUPnUA7uVRSgulyu-FZLrxR1VzKF4S5vkky4,1493
2
+ simulstream/VERSION.txt,sha256=kR_AxIywxwYB21d1qb7xt0DcTMn5tGOJufBWP-frlNc,5
3
+ simulstream/__init__.py,sha256=-PIQAHsqzejXVxmPE9Z2BvkK_LUndIKlpe7qytDPW98,687
3
4
  simulstream/config.py,sha256=u9QocK9QgWKgvN5ZrNRv9sIqV0T40wTSC6Ak7VlfoAg,985
4
- simulstream/inference.py,sha256=YEc4ydAe-lumyUPL-BJD2cfvFmOzP3kEJajjjSN0-yg,6449
5
+ simulstream/inference.py,sha256=CdeOR5mfkFfYiCw-yPT9C3PsFhWO6m5mECjIgwYzT6c,6571
5
6
  simulstream/client/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
6
7
  simulstream/client/wav_reader_client.py,sha256=1zHNdml9riZxKYM2cyAoUGP1-83XYmMEMioeKBlZH-Y,7861
7
8
  simulstream/metrics/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
@@ -34,15 +35,18 @@ simulstream/server/speech_processors/seamless_streamatt.py,sha256=_Z01FKPJEOSPX2
34
35
  simulstream/server/speech_processors/simuleval_wrapper.py,sha256=K-gljTmlkJJ9C_RDo34DzNV-dxJdo1xpP3s9mGFzZs0,6655
35
36
  simulstream/server/speech_processors/sliding_window_retranslation.py,sha256=h-zbF1Ydy41y34aAZKrmO0NVCh_jGhXYQ1Qdm0Bvxqc,6653
36
37
  simulstream/server/speech_processors/vad_wrapper.py,sha256=Ecd0dr_xHxl9kKdswR1-OcEZdPgH9HmEDzz0S6Okrzw,9251
37
- simulstream-0.1.0.dist-info/licenses/LICENSE,sha256=knu-Hk-hl9xNWU_Gn_6Msa5U8gHhDT8yIHW7_KrmdhE,11333
38
+ simulstream/server/speech_processors/remote/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
39
+ simulstream/server/speech_processors/remote/http_proxy_speech_processor.py,sha256=o1atek9dJrAoAkGYDgjOhn22nesjvN1GEPAUHkYJY4E,4030
40
+ simulstream/server/speech_processors/remote/http_speech_processor_server.py,sha256=dE3oJE_u9AOOdKkyLSzvBcGNfvPnssz2GyP7J_ZIIB0,8768
41
+ simulstream-0.2.0.dist-info/licenses/LICENSE,sha256=knu-Hk-hl9xNWU_Gn_6Msa5U8gHhDT8yIHW7_KrmdhE,11333
38
42
  uts/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
39
43
  uts/utils.py,sha256=CXC0tahql0HZN4Cb2sQLyYsaW22sDz3n8gCvcVFyx2c,127
40
44
  uts/metrics/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
41
45
  uts/metrics/log_reader.py,sha256=6xVFeGDJjl_66juMWl9M5itTZjGv6swowhnpaUwUFcU,2195
42
46
  uts/speech_processors/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
43
47
  uts/speech_processors/test_simuleval_wrapper.py,sha256=50U9a0DVc-X4ySYsrAooy4wYNM-Uwg0A2PnnMUQNTY8,3517
44
- simulstream-0.1.0.dist-info/METADATA,sha256=gaFMBeG_4LSuccVtuIaxHKGmxCKyhq_vx5TnduuLXI0,24508
45
- simulstream-0.1.0.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
46
- simulstream-0.1.0.dist-info/entry_points.txt,sha256=5j84rLKVesw47qjaeELpz25XfzuSg_Zz0JLhcO3Y6Gg,479
47
- simulstream-0.1.0.dist-info/top_level.txt,sha256=DF787K6iu65z7cBQrCRze-l8w8s-ouExTI85-Vxyka8,21
48
- simulstream-0.1.0.dist-info/RECORD,,
48
+ simulstream-0.2.0.dist-info/METADATA,sha256=kHFRK_KwGcSA4xc9yi02gHpHTXjTo2GcO3Q0alSTLZk,24508
49
+ simulstream-0.2.0.dist-info/WHEEL,sha256=wUyA8OaulRlbfwMtmQsvNngGrxQHAvkKcvRmdizlJi0,92
50
+ simulstream-0.2.0.dist-info/entry_points.txt,sha256=5j84rLKVesw47qjaeELpz25XfzuSg_Zz0JLhcO3Y6Gg,479
51
+ simulstream-0.2.0.dist-info/top_level.txt,sha256=DF787K6iu65z7cBQrCRze-l8w8s-ouExTI85-Vxyka8,21
52
+ simulstream-0.2.0.dist-info/RECORD,,
@@ -1,5 +1,5 @@
1
1
  Wheel-Version: 1.0
2
- Generator: setuptools (80.9.0)
2
+ Generator: setuptools (80.10.2)
3
3
  Root-Is-Purelib: true
4
4
  Tag: py3-none-any
5
5