PyPI - simulstream - Versions diffs - 0.1.0__tar.gz → 0.2.0__tar.gz - Mend

simulstream 0.1.0tar.gz → 0.2.0tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (57) hide show

{simulstream-0.1.0/simulstream.egg-info → simulstream-0.2.0}/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: simulstream
-Version: 0.1.0
+Version: 0.2.0
 Summary: A server to run simultaneous/streaming experiments and demo
 Author-email: Marco Gaido <mgaido@fbk.eu>, FBK HLT-MT <mt@fbk.eu>
 License:                                  Apache License

{simulstream-0.1.0 → simulstream-0.2.0}/docs/source/conf.py RENAMED Viewed

@@ -4,6 +4,7 @@
 # https://www.sphinx-doc.org/en/master/usage/configuration.html
 import os
 import sys
+from simulstream import __version__
 sys.path.insert(0, os.path.abspath('../../'))
@@ -13,7 +14,7 @@ sys.path.insert(0, os.path.abspath('../../'))
 project = 'simulstream'
 copyright = '2025, FBK'
 author = 'Marco Gaido, FBK MT Unit'
-release = '0.1.0'
+release = __version__
 # -- General configuration ---------------------------------------------------
 # https://www.sphinx-doc.org/en/master/usage/configuration.html#general-configuration

{simulstream-0.1.0 → simulstream-0.2.0}/pyproject.toml RENAMED Viewed

@@ -66,7 +66,7 @@ eval = [
 ]
 [tool.setuptools.dynamic]
-version = {attr = "simulstream.__version__"}
+version = {file = "simulstream/VERSION.txt"}
 # ---- Explicit project build information ---- #

{simulstream-0.1.0 → simulstream-0.2.0}/simulstream/__init__.py RENAMED Viewed

@@ -12,4 +12,8 @@
 # See the License for the specific language governing permissions and
 # limitations under the License
-__version__ = '0.1.0'
+from pathlib import Path
+with Path(__file__).with_name('VERSION.txt').open('r') as f:
+    __version__ = f.read().strip()

{simulstream-0.1.0 → simulstream-0.2.0}/simulstream/inference.py RENAMED Viewed

@@ -49,8 +49,10 @@ def process_audio(
         sample_rate (int): Audio sample rate (Hz).
         data (np.ndarray): Audio samples as int16 array.
     """
+    # speech_chunk_size is expressed in seconds, so the number of samples corresponding to
+    # one speech chunk is the following
     samples_per_chunk = int(
-        sample_rate * message_processor.speech_processor.speech_chunk_size / 1000.0)
+        sample_rate * message_processor.speech_processor.speech_chunk_size)
     i = 0
     for i in range(0, len(data), samples_per_chunk):
         output = message_processor.process_speech(data[i:i + samples_per_chunk].tobytes())

simulstream-0.2.0/simulstream/server/speech_processors/remote/http_proxy_speech_processor.py ADDED Viewed

@@ -0,0 +1,115 @@
+# Copyright 2026 FBK
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#     http://www.apache.org/licenses/LICENSE-2.0
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License
+import base64
+import json
+from http import HTTPStatus
+from typing import List, Any, Dict, Optional
+import uuid
+import urllib.request
+import numpy as np
+from simulstream.server.speech_processors import SpeechProcessor, IncrementalOutput
+class HttpProxySpeechProcessor(SpeechProcessor):
+    """
+    HTTP-based proxy implementation of :class:`SpeechProcessor`.
+    This class does not perform speech processing locally. Instead, it forwards
+    all method calls to a remote speech processor exposed via HTTP, maintaining
+    a dedicated session on the server side.
+    Each instance of this class corresponds to exactly one remote session.
+    """
+    @classmethod
+    def load_model(cls, config):
+        pass
+    def __init__(self, config):
+        super().__init__(config)
+        self.base_url = f"http://{config.hostname}:{config.port}/"
+        self.session_id = uuid.uuid4().hex
+        self._cached_speech_chunk_size = None
+    def _http_request(
+            self, path: str, method: str, payload: Dict[str, Any]) -> Optional[Dict[str, Any]]:
+        data = json.dumps(payload).encode("utf-8")
+        req = urllib.request.Request(
+            self.base_url + path,
+            data=data,
+            headers={"Content-Type": "application/json"},
+            method=method,
+        )
+        with urllib.request.urlopen(req) as resp:
+            if resp.status == HTTPStatus.NO_CONTENT:
+                return None
+            return json.loads(resp.read())
+    @staticmethod
+    def _to_incremental_outputs(json_dict: Dict[str, Any]):
+        return IncrementalOutput(
+            new_tokens=json_dict["new_tokens"],
+            new_string=json_dict["new_string"],
+            deleted_tokens=json_dict["deleted_tokens"],
+            deleted_string=json_dict["deleted_string"]
+        )
+    @property
+    def speech_chunk_size(self) -> float:
+        if self._cached_speech_chunk_size is None:
+            response = self._http_request("speech_chunk_size", "GET", {
+                "session_id": self.session_id
+            })
+            self._cached_speech_chunk_size = response["speech_chunk_size"]
+        return self._cached_speech_chunk_size
+    def process_chunk(self, waveform: np.float32) -> IncrementalOutput:
+        response = self._http_request("process_chunk", "POST", {
+            "session_id": self.session_id,
+            "waveform": base64.b64encode(waveform.tobytes()).decode("utf-8"),
+        })
+        return self._to_incremental_outputs(response)
+    def set_source_language(self, language):
+        self._http_request("source_language", "PUT", {
+            "session_id": self.session_id,
+            "language": language,
+        })
+    def set_target_language(self, language):
+        self._http_request("target_language", "PUT", {
+            "session_id": self.session_id,
+            "language": language,
+        })
+    def end_of_stream(self) -> IncrementalOutput:
+        response = self._http_request("end_of_stream", "POST", {
+            "session_id": self.session_id,
+        })
+        return self._to_incremental_outputs(response)
+    def clear(self):
+        self._http_request("clear", "POST", {
+            "session_id": self.session_id,
+        })
+    def tokens_to_string(self, tokens: List[str]) -> str:
+        response = self._http_request("tokens_to_string", "GET", {
+            "session_id": self.session_id,
+            "tokens": tokens,
+        })
+        return response["tokens_as_string"]

simulstream-0.2.0/simulstream/server/speech_processors/remote/http_speech_processor_server.py ADDED Viewed

@@ -0,0 +1,221 @@
+# Copyright 2026 FBK
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#     http://www.apache.org/licenses/LICENSE-2.0
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License
+import argparse
+import base64
+import json
+import time
+import logging
+from functools import partial
+from http import HTTPStatus
+from http.server import ThreadingHTTPServer, BaseHTTPRequestHandler
+from queue import Queue
+import threading
+from types import SimpleNamespace
+from typing import Dict, Any, Optional
+import numpy as np
+import simulstream
+from simulstream.config import yaml_config
+from simulstream.server.speech_processors import build_speech_processor, SpeechProcessor
+logging.basicConfig(
+    format='%(asctime)s | %(levelname)s | %(name)s | %(message)s',
+    datefmt='%Y-%m-%d %H:%M:%S',
+    level=logging.INFO,
+)
+LOGGER = logging.getLogger(
+    'simulstream.server.speech_processors.http.http_speech_processor_server')
+class SpeechProcessorSessionManager:
+    def __init__(self, speech_processor_config: SimpleNamespace, size: int, ttl: float):
+        """
+        Args:
+            speech_processor_config: Configuration of the speech processors to create.
+            size: How many speech processors to use.
+            ttl: How long a session may stay idle before cleanup (in seconds).
+        """
+        self._sessions = {}
+        self._last_access = {}
+        self._lock = threading.Lock()
+        self.size = size
+        self.ttl = ttl
+        self.available = Queue(maxsize=size)
+        for _ in range(size):
+            self.available.put_nowait(build_speech_processor(speech_processor_config))
+        # starting cleanup loop
+        self._cleanup_stop_event = threading.Event()
+        self._cleanup_thread = threading.Thread(
+            target=self._cleanup,
+            daemon=True,
+        )
+        self._cleanup_thread.start()
+    def get(self, session_id) -> SpeechProcessor:
+        with self._lock:
+            if session_id not in self._sessions:
+                self._sessions[session_id] = self.available.get_nowait()
+                LOGGER.info(
+                    f"Speech processor allocated to {session_id}, speech processors available: "
+                    f"{self.available.qsize()}")
+            self._last_access[session_id] = time.time()
+            return self._sessions[session_id]
+    def is_active(self, session_id) -> bool:
+        with self._lock:
+            return session_id in self._sessions
+    def close_session(self, session_id):
+        with self._lock:
+            if session_id in self._sessions:
+                speech_processor = self._sessions.pop(session_id)
+                speech_processor.clear()
+                self.available.put_nowait(speech_processor)
+                LOGGER.info(
+                    f"Session {session_id} closed, speech processors available: "
+                    f"{self.available.qsize()}")
+            if session_id in self._last_access:
+                self._last_access.pop(session_id)
+    def _cleanup(self):
+        while not self._cleanup_stop_event.is_set():
+            time.sleep(self.ttl)
+            now = time.time()
+            expired = []
+            with self._lock:
+                for session_id in self._sessions.keys():
+                    if session_id not in self._last_access or \
+                            now - self._last_access[session_id] > self.ttl:
+                        expired.append(session_id)
+            for session_id in expired:
+                self.close_session(session_id)
+    def shutdown(self) -> None:
+        self._cleanup_stop_event.set()
+        self._cleanup_thread.join()
+class HttpSpeechProcessorHandler(BaseHTTPRequestHandler):
+    def __init__(
+            self, *args, speech_processor_manager: SpeechProcessorSessionManager = None, **kwargs):
+        self.speech_processor_manager = speech_processor_manager
+        super().__init__(*args, **kwargs)
+    def _read_json(self) -> dict:
+        length = int(self.headers.get("Content-Length", "0"))
+        data = self.rfile.read(length)
+        return json.loads(data)
+    def _send_json_response(self, code: int, message: Optional[Dict[str, Any]] = None):
+        self.send_response(code)
+        self.send_header("Content-type", "application/json; charset=utf-8")
+        self.end_headers()
+        if message is not None:
+            self.wfile.write(json.dumps(message).encode("utf-8"))
+        else:
+            self.wfile.write("".encode("utf-8"))
+    def do_GET(self):
+        function_handler = getattr(self, "get_" + self.path.strip("/"))
+        function_handler(**self._read_json())
+    def do_POST(self):
+        function_handler = getattr(self, "post_" + self.path.strip("/"))
+        function_handler(**self._read_json())
+    def do_PUT(self):
+        function_handler = getattr(self, "put_" + self.path.strip("/"))
+        function_handler(**self._read_json())
+    def get_speech_chunk_size(self, session_id):
+        processor = self.speech_processor_manager.get(session_id)
+        self._send_json_response(HTTPStatus.OK, {"speech_chunk_size": processor.speech_chunk_size})
+    def post_process_chunk(self, session_id, waveform):
+        processor = self.speech_processor_manager.get(session_id)
+        output = processor.process_chunk(
+            np.frombuffer(base64.b64decode(waveform), dtype=np.float32))
+        self._send_json_response(HTTPStatus.OK, {
+            "new_tokens": output.new_tokens,
+            "new_string": output.new_string,
+            "deleted_tokens": output.deleted_tokens,
+            "deleted_string": output.deleted_string,
+        })
+    def put_source_language(self, session_id, language):
+        processor = self.speech_processor_manager.get(session_id)
+        processor.set_source_language(language)
+        self._send_json_response(HTTPStatus.NO_CONTENT)
+    def put_target_language(self, session_id, language):
+        processor = self.speech_processor_manager.get(session_id)
+        processor.set_target_language(language)
+        self._send_json_response(HTTPStatus.NO_CONTENT)
+    def post_end_of_stream(self, session_id):
+        processor = self.speech_processor_manager.get(session_id)
+        output = processor.end_of_stream()
+        self._send_json_response(HTTPStatus.OK, {
+            "new_tokens": output.new_tokens,
+            "new_string": output.new_string,
+            "deleted_tokens": output.deleted_tokens,
+            "deleted_string": output.deleted_string,
+        })
+    def post_clear(self, session_id):
+        if self.speech_processor_manager.is_active(session_id):
+            self.speech_processor_manager.close_session(session_id)
+        self._send_json_response(HTTPStatus.NO_CONTENT)
+    def get_tokens_to_string(self, session_id, tokens):
+        processor = self.speech_processor_manager.get(session_id)
+        output = processor.tokens_to_string(tokens)
+        self._send_json_response(HTTPStatus.OK, {"tokens_as_string": output})
+def serve(args: argparse.Namespace):
+    LOGGER.info(f"Loading server configuration from {args.server_config}")
+    server_config = yaml_config(args.server_config)
+    LOGGER.info(f"Loading speech processor from {args.speech_processor_config}")
+    speech_processor_loading_time = time.time()
+    speech_processor_session_manager = SpeechProcessorSessionManager(
+        yaml_config(args.speech_processor_config), server_config.pool_size, server_config.ttl
+    )
+    speech_processor_loading_time = time.time() - speech_processor_loading_time
+    LOGGER.info(f"Loaded speech processor in {speech_processor_loading_time:.3f} seconds")
+    custom_handler = partial(
+        HttpSpeechProcessorHandler, speech_processor_manager=speech_processor_session_manager)
+    httpd = ThreadingHTTPServer((server_config.hostname, server_config.port), custom_handler)
+    LOGGER.info(f"Serving on http://{server_config.hostname}:{server_config.port}")
+    httpd.serve_forever()
+    speech_processor_session_manager.shutdown()
+def main():
+    LOGGER.info(f"HTTP speech processor server version: {simulstream.__version__}")
+    parser = argparse.ArgumentParser("http_speech_processor_server")
+    parser.add_argument("--server-config", type=str, default="config/http_server_example.yaml")
+    parser.add_argument("--speech-processor-config", type=str, required=True)
+    args = parser.parse_args()
+    serve(args)
+if __name__ == "__main__":
+    main()

simulstream-0.2.0/simulstream/version.txt ADDED Viewed

	@@ -0,0 +1 @@
1	+ 0.2.0

{simulstream-0.1.0 → simulstream-0.2.0/simulstream.egg-info}/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: simulstream
-Version: 0.1.0
+Version: 0.2.0
 Summary: A server to run simultaneous/streaming experiments and demo
 Author-email: Marco Gaido <mgaido@fbk.eu>, FBK HLT-MT <mt@fbk.eu>
 License:                                  Apache License

{simulstream-0.1.0 → simulstream-0.2.0}/simulstream.egg-info/SOURCES.txt RENAMED Viewed

@@ -2,9 +2,11 @@ LICENSE
 README.md
 pyproject.toml
 docs/source/conf.py
+simulstream/VERSION.txt
 simulstream/__init__.py
 simulstream/config.py
 simulstream/inference.py
+simulstream/version.txt
 simulstream.egg-info/PKG-INFO
 simulstream.egg-info/SOURCES.txt
 simulstream.egg-info/dependency_links.txt
@@ -43,6 +45,9 @@ simulstream/server/speech_processors/seamless_streamatt.py
 simulstream/server/speech_processors/simuleval_wrapper.py
 simulstream/server/speech_processors/sliding_window_retranslation.py
 simulstream/server/speech_processors/vad_wrapper.py
+simulstream/server/speech_processors/remote/__init__.py
+simulstream/server/speech_processors/remote/http_proxy_speech_processor.py
+simulstream/server/speech_processors/remote/http_speech_processor_server.py
 uts/__init__.py
 uts/utils.py
 uts/metrics/__init__.py