sales-model 0.1.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- app/__init__.py +0 -0
- app/auth/__init__.py +0 -0
- app/auth/api_keys.py +290 -0
- app/auth/jwt.py +103 -0
- app/auth/rate_limit.py +41 -0
- app/auth/rate_limiter.py +354 -0
- app/auth/security.py +367 -0
- app/billing/__init__.py +24 -0
- app/billing/usage.py +488 -0
- app/dashboard/__init__.py +1 -0
- app/dashboard/data.py +139 -0
- app/dashboard/data_backup.py +942 -0
- app/dashboard/models.py +387 -0
- app/dashboard/postgres_data.py +1208 -0
- app/dashboard/routes.py +1006 -0
- app/main.py +587 -0
- app/main_v2.py +693 -0
- app/observability/__init__.py +0 -0
- app/observability/logging.py +23 -0
- app/observability/metrics.py +9 -0
- app/observability/tracing.py +5 -0
- app/providers/__init__.py +0 -0
- app/providers/azure_foundry_stt.py +111 -0
- app/providers/azure_foundry_tts.py +123 -0
- app/providers/llm_base.py +15 -0
- app/providers/null_stt.py +28 -0
- app/providers/null_tts.py +13 -0
- app/providers/stt_base.py +27 -0
- app/providers/tts_base.py +8 -0
- app/sales_brain/__init__.py +0 -0
- app/sales_brain/brain.py +26 -0
- app/sales_brain/chunker.py +48 -0
- app/storage/__init__.py +0 -0
- app/storage/database.py +761 -0
- app/storage/postgres.py +17 -0
- app/storage/redis.py +176 -0
- app/storage/schema.sql +319 -0
- app/utils/__init__.py +1 -0
- app/utils/latency.py +323 -0
- app/voice/__init__.py +0 -0
- app/voice/audio.py +8 -0
- app/voice/session.py +225 -0
- app/voice/ssml.py +32 -0
- app/voice/vad.py +6 -0
- app/voice/voicelive.py +324 -0
- app/voice/ws.py +144 -0
- app/webui/app.js +384 -0
- app/webui/index.html +90 -0
- app/webui/styles.css +267 -0
- sales_model/__init__.py +8 -0
- sales_model/ai.py +54 -0
- sales_model/cli.py +51 -0
- sales_model/config.py +37 -0
- sales_model/context_utils.py +170 -0
- sales_model/crm.py +20 -0
- sales_model/inventory.py +144 -0
- sales_model/playbook.py +37 -0
- sales_model/prompt_cache.py +14 -0
- sales_model/prompt_compiler.py +47 -0
- sales_model/prompt_registry.py +102 -0
- sales_model/sales_brain.py +731 -0
- sales_model/schemas.py +57 -0
- sales_model/status_engine.py +258 -0
- sales_model/tactics.py +210 -0
- sales_model-0.1.0.dist-info/METADATA +107 -0
- sales_model-0.1.0.dist-info/RECORD +68 -0
- sales_model-0.1.0.dist-info/WHEEL +4 -0
- sales_model-0.1.0.dist-info/entry_points.txt +2 -0
|
@@ -0,0 +1,23 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
import logging
|
|
4
|
+
import os
|
|
5
|
+
|
|
6
|
+
import structlog
|
|
7
|
+
|
|
8
|
+
|
|
9
|
+
def configure_logging() -> None:
|
|
10
|
+
level_name = os.getenv("LOG_LEVEL", "INFO").upper()
|
|
11
|
+
level = logging._nameToLevel.get(level_name, logging.INFO)
|
|
12
|
+
logging.basicConfig(level=level, format="%(message)s")
|
|
13
|
+
structlog.configure(
|
|
14
|
+
processors=[
|
|
15
|
+
structlog.processors.TimeStamper(fmt="iso"),
|
|
16
|
+
structlog.processors.JSONRenderer(),
|
|
17
|
+
],
|
|
18
|
+
wrapper_class=structlog.make_filtering_bound_logger(level),
|
|
19
|
+
)
|
|
20
|
+
|
|
21
|
+
|
|
22
|
+
def get_logger(name: str = "voice") -> structlog.stdlib.BoundLogger:
|
|
23
|
+
return structlog.get_logger(name)
|
|
File without changes
|
|
@@ -0,0 +1,111 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
import asyncio
|
|
4
|
+
import os
|
|
5
|
+
from typing import AsyncIterator, Optional
|
|
6
|
+
|
|
7
|
+
import azure.cognitiveservices.speech as speechsdk
|
|
8
|
+
|
|
9
|
+
from app.providers.stt_base import STTProvider, STTStream, TranscriptEvent
|
|
10
|
+
|
|
11
|
+
|
|
12
|
+
def _speech_config() -> speechsdk.SpeechConfig:
|
|
13
|
+
key = os.getenv("SPEECH_KEY")
|
|
14
|
+
endpoint = os.getenv("ENDPOINT") or os.getenv("SPEECH_ENDPOINT")
|
|
15
|
+
region = os.getenv("SPEECH_REGION")
|
|
16
|
+
|
|
17
|
+
if not key or not (endpoint or region):
|
|
18
|
+
raise RuntimeError("Missing SPEECH_KEY and ENDPOINT or SPEECH_REGION for Foundry Speech.")
|
|
19
|
+
|
|
20
|
+
if endpoint:
|
|
21
|
+
config = speechsdk.SpeechConfig(subscription=key, endpoint=endpoint)
|
|
22
|
+
else:
|
|
23
|
+
config = speechsdk.SpeechConfig(subscription=key, region=region)
|
|
24
|
+
|
|
25
|
+
language = os.getenv("SPEECH_LANGUAGE", "en-US")
|
|
26
|
+
config.speech_recognition_language = language
|
|
27
|
+
|
|
28
|
+
end_silence = os.getenv("SPEECH_END_SILENCE_MS", "600")
|
|
29
|
+
initial_silence = os.getenv("SPEECH_INITIAL_SILENCE_MS", "4000")
|
|
30
|
+
stable_partial = os.getenv("SPEECH_STABLE_PARTIAL", "3")
|
|
31
|
+
segment_silence = os.getenv("SPEECH_SEGMENT_SILENCE_MS")
|
|
32
|
+
|
|
33
|
+
try:
|
|
34
|
+
config.set_property(speechsdk.PropertyId.SpeechServiceConnection_EndSilenceTimeoutMs, end_silence)
|
|
35
|
+
config.set_property(speechsdk.PropertyId.SpeechServiceConnection_InitialSilenceTimeoutMs, initial_silence)
|
|
36
|
+
config.set_property(speechsdk.PropertyId.SpeechServiceResponse_StablePartialResultThreshold, stable_partial)
|
|
37
|
+
if segment_silence:
|
|
38
|
+
config.set_property(
|
|
39
|
+
speechsdk.PropertyId.SpeechServiceConnection_SegmentationSilenceTimeoutMs, segment_silence
|
|
40
|
+
)
|
|
41
|
+
except Exception:
|
|
42
|
+
# If a property isn't supported in this SDK version, ignore.
|
|
43
|
+
pass
|
|
44
|
+
return config
|
|
45
|
+
|
|
46
|
+
|
|
47
|
+
class FoundrySTTStream(STTStream):
|
|
48
|
+
def __init__(self, speech_config: speechsdk.SpeechConfig, sample_rate: int) -> None:
|
|
49
|
+
self._loop = asyncio.get_running_loop()
|
|
50
|
+
self._queue: asyncio.Queue[Optional[TranscriptEvent]] = asyncio.Queue()
|
|
51
|
+
self._done = asyncio.Event()
|
|
52
|
+
|
|
53
|
+
stream_format = speechsdk.audio.AudioStreamFormat(samples_per_second=sample_rate, bits_per_sample=16, channels=1)
|
|
54
|
+
self._push_stream = speechsdk.audio.PushAudioInputStream(stream_format=stream_format)
|
|
55
|
+
audio_config = speechsdk.audio.AudioConfig(stream=self._push_stream)
|
|
56
|
+
self._recognizer = speechsdk.SpeechRecognizer(speech_config=speech_config, audio_config=audio_config)
|
|
57
|
+
|
|
58
|
+
self._recognizer.recognizing.connect(self._on_recognizing)
|
|
59
|
+
self._recognizer.recognized.connect(self._on_recognized)
|
|
60
|
+
self._recognizer.canceled.connect(self._on_canceled)
|
|
61
|
+
self._recognizer.session_stopped.connect(self._on_session_stopped)
|
|
62
|
+
|
|
63
|
+
self._recognizer.start_continuous_recognition_async()
|
|
64
|
+
|
|
65
|
+
def _emit(self, event: Optional[TranscriptEvent]) -> None:
|
|
66
|
+
self._loop.call_soon_threadsafe(self._queue.put_nowait, event)
|
|
67
|
+
|
|
68
|
+
def _on_recognizing(self, evt: speechsdk.SpeechRecognitionEventArgs) -> None:
|
|
69
|
+
text = evt.result.text
|
|
70
|
+
if text:
|
|
71
|
+
self._emit(TranscriptEvent(text=text, is_final=False))
|
|
72
|
+
|
|
73
|
+
def _on_recognized(self, evt: speechsdk.SpeechRecognitionEventArgs) -> None:
|
|
74
|
+
if evt.result.reason == speechsdk.ResultReason.RecognizedSpeech:
|
|
75
|
+
text = evt.result.text
|
|
76
|
+
if text:
|
|
77
|
+
self._emit(TranscriptEvent(text=text, is_final=True))
|
|
78
|
+
|
|
79
|
+
def _on_canceled(self, _: speechsdk.SpeechRecognitionEventArgs) -> None:
|
|
80
|
+
self._finish()
|
|
81
|
+
|
|
82
|
+
def _on_session_stopped(self, _: speechsdk.SessionEventArgs) -> None:
|
|
83
|
+
self._finish()
|
|
84
|
+
|
|
85
|
+
def _finish(self) -> None:
|
|
86
|
+
if self._done.is_set():
|
|
87
|
+
return
|
|
88
|
+
self._done.set()
|
|
89
|
+
self._emit(None)
|
|
90
|
+
|
|
91
|
+
async def send_audio(self, data: bytes) -> None:
|
|
92
|
+
if data:
|
|
93
|
+
self._push_stream.write(data)
|
|
94
|
+
|
|
95
|
+
async def end(self) -> None:
|
|
96
|
+
self._push_stream.close()
|
|
97
|
+
await asyncio.to_thread(self._recognizer.stop_continuous_recognition_async().get)
|
|
98
|
+
self._finish()
|
|
99
|
+
|
|
100
|
+
async def results(self) -> AsyncIterator[TranscriptEvent]:
|
|
101
|
+
while True:
|
|
102
|
+
item = await self._queue.get()
|
|
103
|
+
if item is None:
|
|
104
|
+
break
|
|
105
|
+
yield item
|
|
106
|
+
|
|
107
|
+
|
|
108
|
+
class FoundrySTTProvider(STTProvider):
|
|
109
|
+
async def start_stream(self, sample_rate: int) -> STTStream:
|
|
110
|
+
speech_config = _speech_config()
|
|
111
|
+
return FoundrySTTStream(speech_config, sample_rate)
|
|
@@ -0,0 +1,123 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
import asyncio
|
|
4
|
+
import os
|
|
5
|
+
from typing import AsyncIterator, Optional
|
|
6
|
+
|
|
7
|
+
import azure.cognitiveservices.speech as speechsdk
|
|
8
|
+
|
|
9
|
+
from app.observability.logging import get_logger
|
|
10
|
+
from app.providers.tts_base import TTSProvider
|
|
11
|
+
|
|
12
|
+
|
|
13
|
+
def _wrap_with_sales_ssml(text: str, voice: str = "en-US-AriaNeural") -> str:
|
|
14
|
+
"""Wrap text with SSML for optimized sales voice styling."""
|
|
15
|
+
# Don't double-wrap if already SSML
|
|
16
|
+
if text.strip().startswith("<speak"):
|
|
17
|
+
return text
|
|
18
|
+
|
|
19
|
+
# Use customerservice style for AriaNeural, friendly for others
|
|
20
|
+
style = "customerservice" if "Aria" in voice else "friendly"
|
|
21
|
+
style_degree = "1.2" if "Aria" in voice else "1.0"
|
|
22
|
+
|
|
23
|
+
return f'''<speak version="1.0" xmlns="http://www.w3.org/2001/10/synthesis" xml:lang="en-US">
|
|
24
|
+
<voice name="{voice}" style="{style}" styledegree="{style_degree}">
|
|
25
|
+
{text}
|
|
26
|
+
</voice>
|
|
27
|
+
</speak>'''
|
|
28
|
+
|
|
29
|
+
|
|
30
|
+
def _speech_config() -> speechsdk.SpeechConfig:
|
|
31
|
+
key = os.getenv("SPEECH_KEY")
|
|
32
|
+
endpoint = os.getenv("ENDPOINT") or os.getenv("SPEECH_ENDPOINT")
|
|
33
|
+
region = os.getenv("SPEECH_REGION")
|
|
34
|
+
|
|
35
|
+
if not key or not (endpoint or region):
|
|
36
|
+
raise RuntimeError("Missing SPEECH_KEY and ENDPOINT or SPEECH_REGION for Foundry Speech.")
|
|
37
|
+
|
|
38
|
+
if endpoint:
|
|
39
|
+
config = speechsdk.SpeechConfig(subscription=key, endpoint=endpoint)
|
|
40
|
+
else:
|
|
41
|
+
config = speechsdk.SpeechConfig(subscription=key, region=region)
|
|
42
|
+
|
|
43
|
+
voice = os.getenv("SPEECH_VOICE", "en-US-AriaNeural")
|
|
44
|
+
config.speech_synthesis_voice_name = voice
|
|
45
|
+
config.set_speech_synthesis_output_format(
|
|
46
|
+
speechsdk.SpeechSynthesisOutputFormat.Raw16Khz16BitMonoPcm
|
|
47
|
+
)
|
|
48
|
+
return config
|
|
49
|
+
|
|
50
|
+
|
|
51
|
+
class FoundryTTSProvider(TTSProvider):
|
|
52
|
+
async def synthesize_stream(self, text: str, voice: Optional[str] = None) -> AsyncIterator[bytes]:
|
|
53
|
+
logger = get_logger("tts")
|
|
54
|
+
speech_config = _speech_config()
|
|
55
|
+
if voice:
|
|
56
|
+
speech_config.speech_synthesis_voice_name = voice
|
|
57
|
+
|
|
58
|
+
queue: asyncio.Queue[Optional[bytes]] = asyncio.Queue()
|
|
59
|
+
loop = asyncio.get_running_loop()
|
|
60
|
+
callback = _StreamingOutputStream(loop, queue)
|
|
61
|
+
stream = speechsdk.audio.PushAudioOutputStream(callback)
|
|
62
|
+
audio_config = speechsdk.audio.AudioOutputConfig(stream=stream)
|
|
63
|
+
synthesizer = speechsdk.SpeechSynthesizer(speech_config=speech_config, audio_config=audio_config)
|
|
64
|
+
|
|
65
|
+
# Wrap text with sales-optimized SSML
|
|
66
|
+
voice_name = voice or speech_config.speech_synthesis_voice_name
|
|
67
|
+
ssml_text = _wrap_with_sales_ssml(text, voice_name)
|
|
68
|
+
|
|
69
|
+
async def _run() -> None:
|
|
70
|
+
if ssml_text.lstrip().startswith("<speak"):
|
|
71
|
+
result = await asyncio.to_thread(synthesizer.speak_ssml_async(ssml_text).get)
|
|
72
|
+
else:
|
|
73
|
+
result = await asyncio.to_thread(synthesizer.speak_text_async(ssml_text).get)
|
|
74
|
+
if result.reason != speechsdk.ResultReason.SynthesizingAudioCompleted:
|
|
75
|
+
if result.reason == speechsdk.ResultReason.Canceled:
|
|
76
|
+
details = speechsdk.SpeechSynthesisCancellationDetails.from_result(result)
|
|
77
|
+
logger.warning(
|
|
78
|
+
"tts_canceled",
|
|
79
|
+
reason=str(details.reason),
|
|
80
|
+
error_code=str(details.error_code),
|
|
81
|
+
error_details=details.error_details,
|
|
82
|
+
)
|
|
83
|
+
else:
|
|
84
|
+
logger.warning("tts_failed", reason=str(result.reason))
|
|
85
|
+
callback.end()
|
|
86
|
+
return
|
|
87
|
+
callback.end()
|
|
88
|
+
|
|
89
|
+
task = asyncio.create_task(_run())
|
|
90
|
+
|
|
91
|
+
sent = 0
|
|
92
|
+
while True:
|
|
93
|
+
chunk = await queue.get()
|
|
94
|
+
if chunk is None:
|
|
95
|
+
break
|
|
96
|
+
sent += 1
|
|
97
|
+
yield chunk
|
|
98
|
+
|
|
99
|
+
await task
|
|
100
|
+
if sent == 0:
|
|
101
|
+
logger.warning("tts_empty_audio")
|
|
102
|
+
|
|
103
|
+
|
|
104
|
+
class _StreamingOutputStream(speechsdk.audio.PushAudioOutputStreamCallback):
|
|
105
|
+
def __init__(self, loop: asyncio.AbstractEventLoop, queue: asyncio.Queue[Optional[bytes]]) -> None:
|
|
106
|
+
self._loop = loop
|
|
107
|
+
self._queue = queue
|
|
108
|
+
self._closed = False
|
|
109
|
+
|
|
110
|
+
def write(self, audio_buffer: memoryview) -> int:
|
|
111
|
+
data = bytes(audio_buffer)
|
|
112
|
+
if data:
|
|
113
|
+
self._loop.call_soon_threadsafe(self._queue.put_nowait, data)
|
|
114
|
+
return len(audio_buffer)
|
|
115
|
+
|
|
116
|
+
def close(self) -> None:
|
|
117
|
+
self.end()
|
|
118
|
+
|
|
119
|
+
def end(self) -> None:
|
|
120
|
+
if self._closed:
|
|
121
|
+
return
|
|
122
|
+
self._closed = True
|
|
123
|
+
self._loop.call_soon_threadsafe(self._queue.put_nowait, None)
|
|
@@ -0,0 +1,15 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
from dataclasses import dataclass
|
|
4
|
+
from typing import Dict
|
|
5
|
+
|
|
6
|
+
|
|
7
|
+
@dataclass
|
|
8
|
+
class LLMResult:
|
|
9
|
+
text: str
|
|
10
|
+
metadata: Dict[str, str]
|
|
11
|
+
|
|
12
|
+
|
|
13
|
+
class LLMProvider:
|
|
14
|
+
async def generate(self, prompt: str) -> LLMResult:
|
|
15
|
+
raise NotImplementedError
|
|
@@ -0,0 +1,28 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
import asyncio
|
|
4
|
+
from typing import AsyncIterator
|
|
5
|
+
|
|
6
|
+
from app.providers.stt_base import STTProvider, STTStream, TranscriptEvent
|
|
7
|
+
|
|
8
|
+
|
|
9
|
+
class NullSTTStream(STTStream):
|
|
10
|
+
def __init__(self) -> None:
|
|
11
|
+
self._closed = asyncio.Event()
|
|
12
|
+
|
|
13
|
+
async def send_audio(self, _: bytes) -> None:
|
|
14
|
+
return
|
|
15
|
+
|
|
16
|
+
async def end(self) -> None:
|
|
17
|
+
self._closed.set()
|
|
18
|
+
|
|
19
|
+
async def results(self) -> AsyncIterator[TranscriptEvent]:
|
|
20
|
+
await self._closed.wait()
|
|
21
|
+
if False:
|
|
22
|
+
yield TranscriptEvent(text="", is_final=True)
|
|
23
|
+
|
|
24
|
+
|
|
25
|
+
class NullSTTProvider(STTProvider):
|
|
26
|
+
async def start_stream(self, sample_rate: int) -> STTStream:
|
|
27
|
+
_ = sample_rate
|
|
28
|
+
return NullSTTStream()
|
|
@@ -0,0 +1,13 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
from typing import AsyncIterator, Optional
|
|
4
|
+
|
|
5
|
+
from app.providers.tts_base import TTSProvider
|
|
6
|
+
|
|
7
|
+
|
|
8
|
+
class NullTTSProvider(TTSProvider):
|
|
9
|
+
async def synthesize_stream(self, text: str, voice: Optional[str] = None) -> AsyncIterator[bytes]:
|
|
10
|
+
_ = text
|
|
11
|
+
_ = voice
|
|
12
|
+
if False:
|
|
13
|
+
yield b""
|
|
@@ -0,0 +1,27 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
from dataclasses import dataclass
|
|
4
|
+
from typing import AsyncIterator, Optional
|
|
5
|
+
|
|
6
|
+
|
|
7
|
+
@dataclass
|
|
8
|
+
class TranscriptEvent:
|
|
9
|
+
text: str
|
|
10
|
+
is_final: bool
|
|
11
|
+
confidence: Optional[float] = None
|
|
12
|
+
|
|
13
|
+
|
|
14
|
+
class STTStream:
|
|
15
|
+
async def send_audio(self, _: bytes) -> None:
|
|
16
|
+
raise NotImplementedError
|
|
17
|
+
|
|
18
|
+
async def end(self) -> None:
|
|
19
|
+
raise NotImplementedError
|
|
20
|
+
|
|
21
|
+
async def results(self) -> AsyncIterator[TranscriptEvent]:
|
|
22
|
+
raise NotImplementedError
|
|
23
|
+
|
|
24
|
+
|
|
25
|
+
class STTProvider:
|
|
26
|
+
async def start_stream(self, sample_rate: int) -> STTStream:
|
|
27
|
+
raise NotImplementedError
|
|
File without changes
|
app/sales_brain/brain.py
ADDED
|
@@ -0,0 +1,26 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
from typing import Any, Dict
|
|
4
|
+
|
|
5
|
+
from sales_model.sales_brain import SalesBrain
|
|
6
|
+
from sales_model.schemas import BrainOutput
|
|
7
|
+
|
|
8
|
+
|
|
9
|
+
def merge_memory(memory: Dict[str, Any], output: BrainOutput) -> Dict[str, Any]:
|
|
10
|
+
ex = output.extracted.to_dict()
|
|
11
|
+
for k, v in ex.items():
|
|
12
|
+
if v in (None, "", []):
|
|
13
|
+
continue
|
|
14
|
+
if k not in memory or memory.get(k) in (None, "", []):
|
|
15
|
+
memory[k] = v
|
|
16
|
+
if output.meta:
|
|
17
|
+
memory["_meta"] = output.meta
|
|
18
|
+
return memory
|
|
19
|
+
|
|
20
|
+
|
|
21
|
+
class SalesBrainEngine:
|
|
22
|
+
def __init__(self) -> None:
|
|
23
|
+
self._brain = SalesBrain()
|
|
24
|
+
|
|
25
|
+
def run_turn(self, state: str, user_message: str, memory: Dict[str, Any]) -> BrainOutput:
|
|
26
|
+
return self._brain.run_turn(state, user_message, memory=memory)
|
|
@@ -0,0 +1,48 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
import re
|
|
4
|
+
from typing import Iterable, List
|
|
5
|
+
|
|
6
|
+
|
|
7
|
+
def split_sentences(text: str) -> List[str]:
|
|
8
|
+
if not text:
|
|
9
|
+
return []
|
|
10
|
+
return [s.strip() for s in re.split(r"(?<=[.!?])\s+", text.strip()) if s.strip()]
|
|
11
|
+
|
|
12
|
+
|
|
13
|
+
def chunk_text(text: str, max_chars: int = 160) -> Iterable[str]:
|
|
14
|
+
return chunk_text_for_speech(text, max_chars=max_chars, max_sentences=3)
|
|
15
|
+
|
|
16
|
+
|
|
17
|
+
def chunk_text_for_speech(text: str, max_chars: int = 160, max_sentences: int = 2) -> Iterable[str]:
|
|
18
|
+
if not text:
|
|
19
|
+
return []
|
|
20
|
+
sentences = split_sentences(text)
|
|
21
|
+
chunk = ""
|
|
22
|
+
sentence_count = 0
|
|
23
|
+
chunks: List[str] = []
|
|
24
|
+
for sentence in sentences:
|
|
25
|
+
if not sentence:
|
|
26
|
+
continue
|
|
27
|
+
proposed = f"{chunk} {sentence}".strip() if chunk else sentence
|
|
28
|
+
if (len(proposed) <= max_chars) and (sentence_count < max_sentences):
|
|
29
|
+
chunk = proposed
|
|
30
|
+
sentence_count += 1
|
|
31
|
+
continue
|
|
32
|
+
if chunk:
|
|
33
|
+
chunks.append(chunk)
|
|
34
|
+
if len(sentence) <= max_chars:
|
|
35
|
+
chunk = sentence
|
|
36
|
+
sentence_count = 1
|
|
37
|
+
else:
|
|
38
|
+
# Hard split long sentences.
|
|
39
|
+
start = 0
|
|
40
|
+
while start < len(sentence):
|
|
41
|
+
end = min(start + max_chars, len(sentence))
|
|
42
|
+
chunks.append(sentence[start:end].strip())
|
|
43
|
+
start = end
|
|
44
|
+
chunk = ""
|
|
45
|
+
sentence_count = 0
|
|
46
|
+
if chunk:
|
|
47
|
+
chunks.append(chunk)
|
|
48
|
+
return chunks
|
app/storage/__init__.py
ADDED
|
File without changes
|