livekit-plugins-nltk 0.2.0__py3-none-any.whl → 0.4.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- livekit/plugins/nltk/__init__.py +4 -3
- livekit/plugins/nltk/py.typed +0 -0
- livekit/plugins/nltk/sentence_tokenizer.py +24 -10
- livekit/plugins/nltk/version.py +1 -1
- {livekit_plugins_nltk-0.2.0.dist-info → livekit_plugins_nltk-0.4.0.dist-info}/METADATA +3 -2
- livekit_plugins_nltk-0.4.0.dist-info/RECORD +8 -0
- {livekit_plugins_nltk-0.2.0.dist-info → livekit_plugins_nltk-0.4.0.dist-info}/WHEEL +1 -1
- livekit_plugins_nltk-0.2.0.dist-info/RECORD +0 -7
- {livekit_plugins_nltk-0.2.0.dist-info → livekit_plugins_nltk-0.4.0.dist-info}/top_level.txt +0 -0
livekit/plugins/nltk/__init__.py
CHANGED
@@ -13,7 +13,7 @@
|
|
13
13
|
# limitations under the License.
|
14
14
|
|
15
15
|
|
16
|
-
from .sentence_tokenizer import
|
16
|
+
from .sentence_tokenizer import SentenceStream, SentenceTokenizer
|
17
17
|
from .version import __version__
|
18
18
|
|
19
19
|
__all__ = [
|
@@ -24,12 +24,13 @@ __all__ = [
|
|
24
24
|
|
25
25
|
|
26
26
|
from livekit.agents import Plugin
|
27
|
-
|
27
|
+
|
28
|
+
import nltk # type: ignore
|
28
29
|
|
29
30
|
|
30
31
|
class NltkPlugin(Plugin):
|
31
32
|
def __init__(self):
|
32
|
-
super().__init__(__name__, __version__)
|
33
|
+
super().__init__(__name__, __version__, __package__)
|
33
34
|
|
34
35
|
def download_files(self):
|
35
36
|
try:
|
File without changes
|
@@ -1,10 +1,14 @@
|
|
1
|
-
from
|
2
|
-
|
3
|
-
from dataclasses import dataclass
|
4
|
-
import dataclasses
|
5
|
-
from typing import Optional
|
1
|
+
from __future__ import annotations
|
2
|
+
|
6
3
|
import asyncio
|
7
|
-
import
|
4
|
+
import dataclasses
|
5
|
+
import logging
|
6
|
+
from dataclasses import dataclass
|
7
|
+
from typing import List, Optional
|
8
|
+
|
9
|
+
from livekit import agents
|
10
|
+
|
11
|
+
import nltk # type: ignore
|
8
12
|
|
9
13
|
# nltk is using the punkt tokenizer
|
10
14
|
# https://www.nltk.org/_modules/nltk/tokenize/punkt.html
|
@@ -77,12 +81,17 @@ class SentenceStream(agents.tokenize.SentenceStream):
|
|
77
81
|
self._language = language
|
78
82
|
self._context_len = context_len
|
79
83
|
self._min_sentence_len = min_sentence_len
|
80
|
-
self._event_queue = asyncio.Queue()
|
84
|
+
self._event_queue = asyncio.Queue[agents.tokenize.SegmentedSentence | None]()
|
85
|
+
self._closed = False
|
81
86
|
|
82
|
-
self._incomplete_sentences = [] # <= min_sentence_len
|
87
|
+
self._incomplete_sentences: List[str] = [] # <= min_sentence_len
|
83
88
|
self._buffer = ""
|
84
89
|
|
85
90
|
def push_text(self, text: str) -> None:
|
91
|
+
if self._closed:
|
92
|
+
logging.error("Cannot push text to closed stream")
|
93
|
+
return
|
94
|
+
|
86
95
|
for char in text:
|
87
96
|
self._buffer += char
|
88
97
|
|
@@ -118,11 +127,16 @@ class SentenceStream(agents.tokenize.SentenceStream):
|
|
118
127
|
if buff:
|
119
128
|
await self._event_queue.put(agents.tokenize.SegmentedSentence(text=buff))
|
120
129
|
|
130
|
+
async def aclose(self) -> None:
|
131
|
+
self._closed = True
|
132
|
+
self._event_queue.put_nowait(None)
|
133
|
+
|
121
134
|
async def __anext__(self) -> agents.tokenize.SegmentedSentence:
|
122
|
-
|
135
|
+
event = await self._event_queue.get()
|
136
|
+
if event is None:
|
123
137
|
raise StopAsyncIteration
|
124
138
|
|
125
|
-
return
|
139
|
+
return event
|
126
140
|
|
127
141
|
def __aiter__(self) -> "SentenceStream":
|
128
142
|
return self
|
livekit/plugins/nltk/version.py
CHANGED
@@ -1,6 +1,6 @@
|
|
1
1
|
Metadata-Version: 2.1
|
2
2
|
Name: livekit-plugins-nltk
|
3
|
-
Version: 0.
|
3
|
+
Version: 0.4.0
|
4
4
|
Summary: Agent Framework plugin for NLTK-based text processing.
|
5
5
|
Home-page: https://github.com/livekit/agents
|
6
6
|
License: Apache-2.0
|
@@ -18,8 +18,9 @@ Classifier: Programming Language :: Python :: 3.10
|
|
18
18
|
Classifier: Programming Language :: Python :: 3 :: Only
|
19
19
|
Requires-Python: >=3.9.0
|
20
20
|
Description-Content-Type: text/markdown
|
21
|
-
Requires-Dist: livekit
|
21
|
+
Requires-Dist: livekit ~=0.9
|
22
22
|
Requires-Dist: nltk <4,>=3
|
23
|
+
Requires-Dist: livekit-agents ~=0.5.dev0
|
23
24
|
|
24
25
|
# LiveKit Plugins NLTK
|
25
26
|
|
@@ -0,0 +1,8 @@
|
|
1
|
+
livekit/plugins/nltk/__init__.py,sha256=HGbaUwK-0cU-SbvbEl2WSQKwNrkHn8XuO-86Hiy0cy4,1134
|
2
|
+
livekit/plugins/nltk/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
3
|
+
livekit/plugins/nltk/sentence_tokenizer.py,sha256=1Oy0pmvCiD_vHepi-Vp6lEKEOFS1FdHjO9bvBX8dPCk,4561
|
4
|
+
livekit/plugins/nltk/version.py,sha256=yelanl1wEXtgUH0CzoNVXfi2yTc2hElSzuAhULFzANc,600
|
5
|
+
livekit_plugins_nltk-0.4.0.dist-info/METADATA,sha256=xzUdecj4xBjWc9nyYXddYgNOQ2ES1DlE2ZPzeRdN8fY,1184
|
6
|
+
livekit_plugins_nltk-0.4.0.dist-info/WHEEL,sha256=GJ7t_kWBFywbagK5eo9IoUwLW6oyOeTKmQ-9iHFVNxQ,92
|
7
|
+
livekit_plugins_nltk-0.4.0.dist-info/top_level.txt,sha256=OoDok3xUmXbZRvOrfvvXB-Juu4DX79dlq188E19YHoo,8
|
8
|
+
livekit_plugins_nltk-0.4.0.dist-info/RECORD,,
|
@@ -1,7 +0,0 @@
|
|
1
|
-
livekit/plugins/nltk/__init__.py,sha256=89A9weAyFd0CZcdfrgppDBYAroGroPMv7tkrspVgFGY,1104
|
2
|
-
livekit/plugins/nltk/sentence_tokenizer.py,sha256=QOB0Vaibrq2-TbQScuUZQf8mdIbVTu1G287JXz3xw8I,4202
|
3
|
-
livekit/plugins/nltk/version.py,sha256=cLFCdnm5S21CiJ5UJBcqfRvvFkCQ8p6M5fFUJVJkEiM,600
|
4
|
-
livekit_plugins_nltk-0.2.0.dist-info/METADATA,sha256=jS1I0K2KEk0jwvPCckbD1s35DM4_EwNsVOlVw7_Svdc,1145
|
5
|
-
livekit_plugins_nltk-0.2.0.dist-info/WHEEL,sha256=oiQVh_5PnQM0E3gPdiz09WCNmwiHDMaGer_elqB3coM,92
|
6
|
-
livekit_plugins_nltk-0.2.0.dist-info/top_level.txt,sha256=OoDok3xUmXbZRvOrfvvXB-Juu4DX79dlq188E19YHoo,8
|
7
|
-
livekit_plugins_nltk-0.2.0.dist-info/RECORD,,
|
File without changes
|