PyPI - livekit-plugins-turn-detector - Versions diffs - 0.3.1__tar.gz - Mend

livekit-plugins-turn-detector 0.3.1__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (14) hide show

livekit_plugins_turn_detector-0.3.1/PKG-INFO ADDED Viewed

@@ -0,0 +1,27 @@
+Metadata-Version: 2.1
+Name: livekit-plugins-turn-detector
+Version: 0.3.1
+Summary: End of utterance detection for LiveKit Agents
+Home-page: https://github.com/livekit/agents
+License: Apache-2.0
+Project-URL: Documentation, https://docs.livekit.io
+Project-URL: Website, https://livekit.io/
+Project-URL: Source, https://github.com/livekit/agents
+Keywords: webrtc,realtime,audio,video,livekit
+Classifier: Intended Audience :: Developers
+Classifier: License :: OSI Approved :: Apache Software License
+Classifier: Topic :: Multimedia :: Sound/Audio
+Classifier: Topic :: Multimedia :: Video
+Classifier: Topic :: Scientific/Engineering :: Artificial Intelligence
+Classifier: Programming Language :: Python :: 3
+Classifier: Programming Language :: Python :: 3.9
+Classifier: Programming Language :: Python :: 3.10
+Classifier: Programming Language :: Python :: 3 :: Only
+Requires-Python: >=3.9.0
+Description-Content-Type: text/markdown
+Requires-Dist: livekit-agents>=0.11
+Requires-Dist: transformers>=4.46
+Requires-Dist: numpy>=1.26
+# LiveKit Plugins Turn Detector

livekit_plugins_turn_detector-0.3.1/README.md ADDED Viewed

	@@ -0,0 +1,2 @@
1	+ # LiveKit Plugins Turn Detector
2	+

livekit_plugins_turn_detector-0.3.1/livekit/plugins/turn_detector/__init__.py ADDED Viewed

@@ -0,0 +1,39 @@
+# Copyright 2023 LiveKit, Inc.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+from livekit.agents import Plugin
+from livekit.agents.inference_runner import _InferenceRunner
+from .eou import EOUModel, _EUORunner
+from .log import logger
+from .version import __version__
+__all__ = ["EOUModel", "__version__"]
+class EOUPlugin(Plugin):
+    def __init__(self):
+        super().__init__(__name__, __version__, __package__, logger)
+    def download_files(self) -> None:
+        from transformers import AutoModelForCausalLM, AutoTokenizer
+        from .eou import HG_MODEL
+        AutoModelForCausalLM.from_pretrained(HG_MODEL)
+        AutoTokenizer.from_pretrained(HG_MODEL)
+Plugin.register_plugin(EOUPlugin())
+_InferenceRunner.register_runner(_EUORunner)

livekit_plugins_turn_detector-0.3.1/livekit/plugins/turn_detector/eou.py ADDED Viewed

@@ -0,0 +1,158 @@
+from __future__ import annotations
+import json
+import string
+import time
+import numpy as np
+from livekit.agents import llm
+from livekit.agents.inference_runner import _InferenceRunner
+from livekit.agents.ipc.inference_executor import InferenceExecutor
+from livekit.agents.job import get_current_job_context
+from .log import logger
+HG_MODEL = "livekit/opt-125m-endpoint-detector-2"
+PUNCS = string.punctuation.replace("'", "")
+MAX_HISTORY = 4
+def _softmax(logits: np.ndarray) -> np.ndarray:
+    exp_logits = np.exp(logits - np.max(logits))
+    return exp_logits / np.sum(exp_logits)
+class _EUORunner(_InferenceRunner):
+    INFERENCE_METHOD = "lk_end_of_utterance"
+    def _normalize(self, text):
+        def strip_puncs(text):
+            return text.translate(str.maketrans("", "", PUNCS))
+        return " ".join(strip_puncs(text).lower().split())
+    def _format_chat_ctx(self, chat_ctx: dict):
+        new_chat_ctx = []
+        for msg in chat_ctx:
+            content = self._normalize(msg["content"])
+            if not content:
+                continue
+            msg["content"] = content
+            new_chat_ctx.append(msg)
+        convo_text = self._tokenizer.apply_chat_template(
+            new_chat_ctx,
+            add_generation_prompt=False,
+            add_special_tokens=False,
+            tokenize=False,
+        )
+        # remove the EOU token from current utterance
+        ix = convo_text.rfind("<|im_end|>")
+        text = convo_text[:ix]
+        return text
+    def initialize(self) -> None:
+        from huggingface_hub import errors
+        from transformers import AutoModelForCausalLM, AutoTokenizer
+        try:
+            self._model = AutoModelForCausalLM.from_pretrained(
+                HG_MODEL, local_files_only=True
+            )
+            self._tokenizer = AutoTokenizer.from_pretrained(
+                HG_MODEL, local_files_only=True
+            )
+            self._eou_index = self._tokenizer.encode("<|im_end|>")[-1]
+        except (errors.LocalEntryNotFoundError, OSError):
+            logger.error(
+                (
+                    f"Could not find model {HG_MODEL}. Make sure you have downloaded the model before running the agent. "
+                    "Use `python3 your_agent.py download-files` to download the models."
+                )
+            )
+            raise RuntimeError(
+                f"livekit-plugins-turn-detector initialization failed. Could not find model {HG_MODEL}."
+            ) from None
+    def run(self, data: bytes) -> bytes | None:
+        data_json = json.loads(data)
+        chat_ctx = data_json.get("chat_ctx", None)
+        if not chat_ctx:
+            raise ValueError("chat_ctx is required on the inference input data")
+        start_time = time.perf_counter()
+        text = self._format_chat_ctx(chat_ctx)
+        inputs = self._tokenizer(
+            text,
+            add_special_tokens=False,
+            return_tensors="pt",
+        )
+        outputs = self._model(**inputs)
+        logits = outputs.logits[0, -1, :].detach().numpy()
+        output_probs = _softmax(logits)
+        eou_probability = output_probs[self._eou_index]
+        end_time = time.perf_counter()
+        logger.debug(
+            "eou prediction",
+            extra={
+                "eou_probability": eou_probability,
+                "input": text,
+                "duration": round(end_time - start_time, 3),
+            },
+        )
+        return json.dumps({"eou_probability": float(eou_probability)}).encode()
+class EOUModel:
+    def __init__(self, inference_executor: InferenceExecutor | None = None) -> None:
+        self._executor = (
+            inference_executor or get_current_job_context().inference_executor
+        )
+    async def predict_eou(self, chat_ctx: llm.ChatContext) -> float:
+        messages = []
+        for msg in chat_ctx.messages:
+            if msg.role not in ("user", "assistant"):
+                continue
+            if isinstance(msg.content, str):
+                messages.append(
+                    {
+                        "role": msg.role,
+                        "content": msg.content,
+                    }
+                )
+            elif isinstance(msg.content, list):
+                for cnt in msg.content:
+                    if isinstance(cnt, str):
+                        messages.append(
+                            {
+                                "role": msg.role,
+                                "content": cnt,
+                            }
+                        )
+                        break
+        messages = messages[-MAX_HISTORY:]
+        json_data = json.dumps({"chat_ctx": messages}).encode()
+        result = await self._executor.do_inference(
+            _EUORunner.INFERENCE_METHOD, json_data
+        )
+        assert (
+            result is not None
+        ), "end_of_utterance prediction should always returns a result"
+        result_json = json.loads(result.decode())
+        return result_json["eou_probability"]

livekit_plugins_turn_detector-0.3.1/livekit/plugins/turn_detector/log.py ADDED Viewed

@@ -0,0 +1,3 @@
+import logging
+logger = logging.getLogger("livekit.plugins.eou")

livekit_plugins_turn_detector-0.3.1/livekit/plugins/turn_detector/version.py ADDED Viewed

@@ -0,0 +1,15 @@
+# Copyright 2023 LiveKit, Inc.
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+__version__ = "0.3.1"

livekit_plugins_turn_detector-0.3.1/livekit_plugins_turn_detector.egg-info/PKG-INFO ADDED Viewed

@@ -0,0 +1,27 @@
+Metadata-Version: 2.1
+Name: livekit-plugins-turn-detector
+Version: 0.3.1
+Summary: End of utterance detection for LiveKit Agents
+Home-page: https://github.com/livekit/agents
+License: Apache-2.0
+Project-URL: Documentation, https://docs.livekit.io
+Project-URL: Website, https://livekit.io/
+Project-URL: Source, https://github.com/livekit/agents
+Keywords: webrtc,realtime,audio,video,livekit
+Classifier: Intended Audience :: Developers
+Classifier: License :: OSI Approved :: Apache Software License
+Classifier: Topic :: Multimedia :: Sound/Audio
+Classifier: Topic :: Multimedia :: Video
+Classifier: Topic :: Scientific/Engineering :: Artificial Intelligence
+Classifier: Programming Language :: Python :: 3
+Classifier: Programming Language :: Python :: 3.9
+Classifier: Programming Language :: Python :: 3.10
+Classifier: Programming Language :: Python :: 3 :: Only
+Requires-Python: >=3.9.0
+Description-Content-Type: text/markdown
+Requires-Dist: livekit-agents>=0.11
+Requires-Dist: transformers>=4.46
+Requires-Dist: numpy>=1.26
+# LiveKit Plugins Turn Detector

livekit_plugins_turn_detector-0.3.1/livekit_plugins_turn_detector.egg-info/SOURCES.txt ADDED Viewed

@@ -0,0 +1,12 @@
+README.md
+pyproject.toml
+setup.py
+livekit/plugins/turn_detector/__init__.py
+livekit/plugins/turn_detector/eou.py
+livekit/plugins/turn_detector/log.py
+livekit/plugins/turn_detector/version.py
+livekit_plugins_turn_detector.egg-info/PKG-INFO
+livekit_plugins_turn_detector.egg-info/SOURCES.txt
+livekit_plugins_turn_detector.egg-info/dependency_links.txt
+livekit_plugins_turn_detector.egg-info/requires.txt
+livekit_plugins_turn_detector.egg-info/top_level.txt

livekit_plugins_turn_detector-0.3.1/livekit_plugins_turn_detector.egg-info/dependency_links.txt ADDED Viewed

	@@ -0,0 +1 @@
1	+

livekit_plugins_turn_detector-0.3.1/livekit_plugins_turn_detector.egg-info/requires.txt ADDED Viewed

@@ -0,0 +1,3 @@
+livekit-agents>=0.11
+transformers>=4.46
+numpy>=1.26

livekit_plugins_turn_detector-0.3.1/livekit_plugins_turn_detector.egg-info/top_level.txt ADDED Viewed

	@@ -0,0 +1 @@
1	+ livekit

livekit_plugins_turn_detector-0.3.1/pyproject.toml ADDED Viewed

@@ -0,0 +1,3 @@
+[build-system]
+requires = ["setuptools>=61.0"]
+build-backend = "setuptools.build_meta"

livekit_plugins_turn_detector-0.3.1/setup.cfg ADDED Viewed

@@ -0,0 +1,4 @@
+[egg_info]
+tag_build =
+tag_date = 0

livekit_plugins_turn_detector-0.3.1/setup.py ADDED Viewed

@@ -0,0 +1,59 @@
+# Copyright 2023 LiveKit, Inc.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+import os
+import pathlib
+import setuptools
+import setuptools.command.build_py
+here = pathlib.Path(__file__).parent.resolve()
+about = {}
+with open(
+    os.path.join(here, "livekit", "plugins", "turn_detector", "version.py"), "r"
+) as f:
+    exec(f.read(), about)
+setuptools.setup(
+    name="livekit-plugins-turn-detector",
+    version=about["__version__"],
+    description="End of utterance detection for LiveKit Agents",
+    long_description=(here / "README.md").read_text(encoding="utf-8"),
+    long_description_content_type="text/markdown",
+    url="https://github.com/livekit/agents",
+    cmdclass={},
+    classifiers=[
+        "Intended Audience :: Developers",
+        "License :: OSI Approved :: Apache Software License",
+        "Topic :: Multimedia :: Sound/Audio",
+        "Topic :: Multimedia :: Video",
+        "Topic :: Scientific/Engineering :: Artificial Intelligence",
+        "Programming Language :: Python :: 3",
+        "Programming Language :: Python :: 3.9",
+        "Programming Language :: Python :: 3.10",
+        "Programming Language :: Python :: 3 :: Only",
+    ],
+    keywords=["webrtc", "realtime", "audio", "video", "livekit"],
+    license="Apache-2.0",
+    packages=setuptools.find_namespace_packages(include=["livekit.*"]),
+    python_requires=">=3.9.0",
+    install_requires=["livekit-agents>=0.11", "transformers>=4.46", "numpy>=1.26"],
+    package_data={"livekit.plugins.eou": ["py.typed"]},
+    project_urls={
+        "Documentation": "https://docs.livekit.io",
+        "Website": "https://livekit.io/",
+        "Source": "https://github.com/livekit/agents",
+    },
+)