openspeechapi 0.1.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- openspeech/__init__.py +75 -0
- openspeech/__main__.py +5 -0
- openspeech/cli.py +413 -0
- openspeech/client/__init__.py +4 -0
- openspeech/client/client.py +145 -0
- openspeech/config.py +212 -0
- openspeech/core/__init__.py +0 -0
- openspeech/core/base.py +75 -0
- openspeech/core/enums.py +39 -0
- openspeech/core/models.py +61 -0
- openspeech/core/registry.py +37 -0
- openspeech/core/settings.py +8 -0
- openspeech/demo.py +675 -0
- openspeech/dispatch/__init__.py +0 -0
- openspeech/dispatch/context.py +34 -0
- openspeech/dispatch/dispatcher.py +661 -0
- openspeech/dispatch/executors/__init__.py +0 -0
- openspeech/dispatch/executors/base.py +34 -0
- openspeech/dispatch/executors/in_process.py +66 -0
- openspeech/dispatch/executors/remote.py +64 -0
- openspeech/dispatch/executors/subprocess_exec.py +446 -0
- openspeech/dispatch/fanout.py +95 -0
- openspeech/dispatch/filters.py +73 -0
- openspeech/dispatch/lifecycle.py +178 -0
- openspeech/dispatch/watcher.py +82 -0
- openspeech/engine_catalog.py +236 -0
- openspeech/engine_registry.yaml +347 -0
- openspeech/exceptions.py +51 -0
- openspeech/factory.py +325 -0
- openspeech/local_engines/__init__.py +12 -0
- openspeech/local_engines/aim_resolver.py +91 -0
- openspeech/local_engines/backends/__init__.py +1 -0
- openspeech/local_engines/backends/docker_backend.py +490 -0
- openspeech/local_engines/backends/native_backend.py +902 -0
- openspeech/local_engines/base.py +30 -0
- openspeech/local_engines/engines/__init__.py +1 -0
- openspeech/local_engines/engines/faster_whisper.py +36 -0
- openspeech/local_engines/engines/fish_speech.py +33 -0
- openspeech/local_engines/engines/sherpa_onnx.py +56 -0
- openspeech/local_engines/engines/whisper.py +41 -0
- openspeech/local_engines/engines/whisperlivekit.py +60 -0
- openspeech/local_engines/manager.py +208 -0
- openspeech/local_engines/models.py +50 -0
- openspeech/local_engines/progress.py +69 -0
- openspeech/local_engines/registry.py +19 -0
- openspeech/local_engines/task_store.py +52 -0
- openspeech/local_engines/tasks.py +71 -0
- openspeech/logging_config.py +607 -0
- openspeech/observe/__init__.py +0 -0
- openspeech/observe/base.py +79 -0
- openspeech/observe/debug.py +44 -0
- openspeech/observe/latency.py +19 -0
- openspeech/observe/metrics.py +47 -0
- openspeech/observe/tracing.py +44 -0
- openspeech/observe/usage.py +27 -0
- openspeech/providers/__init__.py +0 -0
- openspeech/providers/_template.py +101 -0
- openspeech/providers/stt/__init__.py +0 -0
- openspeech/providers/stt/alibaba.py +86 -0
- openspeech/providers/stt/assemblyai.py +135 -0
- openspeech/providers/stt/azure_speech.py +99 -0
- openspeech/providers/stt/baidu.py +135 -0
- openspeech/providers/stt/deepgram.py +311 -0
- openspeech/providers/stt/elevenlabs.py +385 -0
- openspeech/providers/stt/faster_whisper.py +211 -0
- openspeech/providers/stt/google_cloud.py +106 -0
- openspeech/providers/stt/iflytek.py +427 -0
- openspeech/providers/stt/macos_speech.py +226 -0
- openspeech/providers/stt/openai.py +84 -0
- openspeech/providers/stt/sherpa_onnx.py +353 -0
- openspeech/providers/stt/tencent.py +212 -0
- openspeech/providers/stt/volcengine.py +107 -0
- openspeech/providers/stt/whisper.py +153 -0
- openspeech/providers/stt/whisperlivekit.py +530 -0
- openspeech/providers/stt/windows_speech.py +249 -0
- openspeech/providers/tts/__init__.py +0 -0
- openspeech/providers/tts/alibaba.py +95 -0
- openspeech/providers/tts/azure_speech.py +123 -0
- openspeech/providers/tts/baidu.py +143 -0
- openspeech/providers/tts/coqui.py +64 -0
- openspeech/providers/tts/cosyvoice.py +90 -0
- openspeech/providers/tts/deepgram.py +174 -0
- openspeech/providers/tts/elevenlabs.py +311 -0
- openspeech/providers/tts/fish_speech.py +158 -0
- openspeech/providers/tts/google_cloud.py +107 -0
- openspeech/providers/tts/iflytek.py +209 -0
- openspeech/providers/tts/macos_say.py +251 -0
- openspeech/providers/tts/minimax.py +122 -0
- openspeech/providers/tts/openai.py +104 -0
- openspeech/providers/tts/piper.py +104 -0
- openspeech/providers/tts/tencent.py +189 -0
- openspeech/providers/tts/volcengine.py +117 -0
- openspeech/providers/tts/windows_sapi.py +234 -0
- openspeech/server/__init__.py +1 -0
- openspeech/server/app.py +72 -0
- openspeech/server/auth.py +42 -0
- openspeech/server/middleware.py +75 -0
- openspeech/server/routes/__init__.py +1 -0
- openspeech/server/routes/management.py +848 -0
- openspeech/server/routes/stt.py +121 -0
- openspeech/server/routes/tts.py +159 -0
- openspeech/server/routes/webui.py +29 -0
- openspeech/server/webui/app.js +2649 -0
- openspeech/server/webui/index.html +216 -0
- openspeech/server/webui/styles.css +617 -0
- openspeech/server/ws/__init__.py +1 -0
- openspeech/server/ws/stt_stream.py +263 -0
- openspeech/server/ws/tts_stream.py +207 -0
- openspeech/telemetry/__init__.py +21 -0
- openspeech/telemetry/perf.py +307 -0
- openspeech/utils/__init__.py +5 -0
- openspeech/utils/audio_converter.py +406 -0
- openspeech/utils/audio_playback.py +156 -0
- openspeech/vendor_registry.yaml +74 -0
- openspeechapi-0.1.0.dist-info/METADATA +101 -0
- openspeechapi-0.1.0.dist-info/RECORD +118 -0
- openspeechapi-0.1.0.dist-info/WHEEL +4 -0
- openspeechapi-0.1.0.dist-info/entry_points.txt +3 -0
|
@@ -0,0 +1,71 @@
|
|
|
1
|
+
"""Task models for local engine operations."""
|
|
2
|
+
from __future__ import annotations
|
|
3
|
+
|
|
4
|
+
from dataclasses import dataclass, field
|
|
5
|
+
from datetime import datetime, timezone
|
|
6
|
+
from typing import Any
|
|
7
|
+
from uuid import uuid4
|
|
8
|
+
|
|
9
|
+
from openspeech.local_engines.models import EngineAction, TaskStatus
|
|
10
|
+
|
|
11
|
+
|
|
12
|
+
def _utc_now() -> datetime:
|
|
13
|
+
return datetime.now(timezone.utc)
|
|
14
|
+
|
|
15
|
+
|
|
16
|
+
@dataclass
|
|
17
|
+
class EngineTask:
|
|
18
|
+
engine: str
|
|
19
|
+
action: EngineAction
|
|
20
|
+
runtime: str
|
|
21
|
+
task_id: str = field(default_factory=lambda: uuid4().hex)
|
|
22
|
+
status: TaskStatus = TaskStatus.QUEUED
|
|
23
|
+
phase: str = "queued"
|
|
24
|
+
message: str = "Task queued."
|
|
25
|
+
progress: float | None = 0.0
|
|
26
|
+
eta_seconds: int | None = None
|
|
27
|
+
error: str | None = None
|
|
28
|
+
started_at: datetime = field(default_factory=_utc_now)
|
|
29
|
+
updated_at: datetime = field(default_factory=_utc_now)
|
|
30
|
+
finished_at: datetime | None = None
|
|
31
|
+
metadata: dict[str, Any] = field(default_factory=dict)
|
|
32
|
+
|
|
33
|
+
def snapshot(self) -> dict[str, Any]:
|
|
34
|
+
return {
|
|
35
|
+
"task_id": self.task_id,
|
|
36
|
+
"engine": self.engine,
|
|
37
|
+
"action": self.action.value,
|
|
38
|
+
"runtime": self.runtime,
|
|
39
|
+
"status": self.status.value,
|
|
40
|
+
"phase": self.phase,
|
|
41
|
+
"message": self.message,
|
|
42
|
+
"progress": self.progress,
|
|
43
|
+
"eta_seconds": self.eta_seconds,
|
|
44
|
+
"error": self.error,
|
|
45
|
+
"started_at": self.started_at.isoformat(),
|
|
46
|
+
"updated_at": self.updated_at.isoformat(),
|
|
47
|
+
"finished_at": self.finished_at.isoformat() if self.finished_at else None,
|
|
48
|
+
"metadata": self.metadata,
|
|
49
|
+
}
|
|
50
|
+
|
|
51
|
+
@classmethod
|
|
52
|
+
def from_snapshot(cls, data: dict[str, Any]) -> "EngineTask":
|
|
53
|
+
task = cls(
|
|
54
|
+
engine=data["engine"],
|
|
55
|
+
action=EngineAction(data["action"]),
|
|
56
|
+
runtime=data["runtime"],
|
|
57
|
+
task_id=data["task_id"],
|
|
58
|
+
status=TaskStatus(data["status"]),
|
|
59
|
+
phase=data.get("phase", "queued"),
|
|
60
|
+
message=data.get("message", ""),
|
|
61
|
+
progress=data.get("progress"),
|
|
62
|
+
eta_seconds=data.get("eta_seconds"),
|
|
63
|
+
error=data.get("error"),
|
|
64
|
+
metadata=data.get("metadata", {}),
|
|
65
|
+
)
|
|
66
|
+
task.started_at = datetime.fromisoformat(data["started_at"])
|
|
67
|
+
task.updated_at = datetime.fromisoformat(data["updated_at"])
|
|
68
|
+
finished_raw = data.get("finished_at")
|
|
69
|
+
if finished_raw:
|
|
70
|
+
task.finished_at = datetime.fromisoformat(finished_raw)
|
|
71
|
+
return task
|
|
@@ -0,0 +1,607 @@
|
|
|
1
|
+
"""Unified logging configuration for OpenSpeechAPI.
|
|
2
|
+
|
|
3
|
+
Responsibilities:
|
|
4
|
+
- Configure loguru sinks (console + rotating JSONL file) with a consistent
|
|
5
|
+
field contract so logs can be parsed by humans, LLMs, and log pipelines.
|
|
6
|
+
- Provide contextvars-bound fields (``request_id``, ``session_id``,
|
|
7
|
+
``provider``, ``engine``) that automatically decorate every log record
|
|
8
|
+
emitted under a given async request / task tree.
|
|
9
|
+
- Expose a simple public API (:func:`configure_logging`, :func:`bind_context`,
|
|
10
|
+
:func:`get_log_settings`) that CLI, server app, client and tests can share.
|
|
11
|
+
|
|
12
|
+
Field contract (see ``docs/architecture/logging-spec.md`` for full details)::
|
|
13
|
+
|
|
14
|
+
ts, level, event, module, message,
|
|
15
|
+
request_id, session_id, provider, engine,
|
|
16
|
+
phase, elapsed_ms, ttfb_ms, payload
|
|
17
|
+
|
|
18
|
+
``event`` is a dotted namespace (e.g. ``ws.first_response``,
|
|
19
|
+
``dispatch.dispatch_total``) and is always set for structured milestone
|
|
20
|
+
records emitted via :mod:`openspeech.telemetry.perf`. Legacy free-form
|
|
21
|
+
``logger.info("...")`` calls simply leave ``event`` empty.
|
|
22
|
+
"""
|
|
23
|
+
from __future__ import annotations
|
|
24
|
+
|
|
25
|
+
import json
|
|
26
|
+
import os
|
|
27
|
+
import sys
|
|
28
|
+
from contextlib import contextmanager
|
|
29
|
+
from contextvars import ContextVar
|
|
30
|
+
from dataclasses import dataclass, field
|
|
31
|
+
from pathlib import Path
|
|
32
|
+
from typing import Any, Callable, Iterator
|
|
33
|
+
|
|
34
|
+
from loguru import logger as _root_logger
|
|
35
|
+
|
|
36
|
+
# ---------------------------------------------------------------------------
|
|
37
|
+
# Host-integration state
|
|
38
|
+
# ---------------------------------------------------------------------------
|
|
39
|
+
#
|
|
40
|
+
# OpenSpeech can run in one of two modes:
|
|
41
|
+
#
|
|
42
|
+
# * **standalone** (default) — our own console + JSONL file sinks, managed
|
|
43
|
+
# by :func:`configure_logging`.
|
|
44
|
+
# * **host-managed** — another loguru-based application (e.g. wallex)
|
|
45
|
+
# wants to own the sinks, and we just emit records with a known tag.
|
|
46
|
+
#
|
|
47
|
+
# The tag is attached to every record emitted through :data:`logger`
|
|
48
|
+
# (the re-exported loguru instance every openspeech module imports). The
|
|
49
|
+
# host can read ``record["extra"]["component"]`` in its own sink to
|
|
50
|
+
# format, filter, or route OpenSpeech records.
|
|
51
|
+
|
|
52
|
+
_INTEGRATION: dict[str, Any] = {
|
|
53
|
+
"tag": "openspeech", # record.extra["component"] value
|
|
54
|
+
"level": None, # optional independent min-level for OS records
|
|
55
|
+
"host_managed": False, # True → ensure_configured() is a no-op
|
|
56
|
+
}
|
|
57
|
+
|
|
58
|
+
# Track sink IDs we own so we can remove them without touching host sinks.
|
|
59
|
+
_OS_SINK_IDS: list[int] = []
|
|
60
|
+
|
|
61
|
+
|
|
62
|
+
def _os_patcher(record: dict[str, Any]) -> None:
|
|
63
|
+
"""Stamp every OpenSpeech log record with the configured component tag.
|
|
64
|
+
|
|
65
|
+
The value is read from :data:`_INTEGRATION` at emit-time so that
|
|
66
|
+
:func:`integrate_with_host` can change it at runtime without rebinding
|
|
67
|
+
the logger reference held by every openspeech module.
|
|
68
|
+
"""
|
|
69
|
+
record["extra"].setdefault("component", _INTEGRATION["tag"])
|
|
70
|
+
|
|
71
|
+
|
|
72
|
+
# Single logger instance shared by the whole openspeech package. All
|
|
73
|
+
# internal modules must import it as ``from openspeech.logging_config
|
|
74
|
+
# import logger`` so the component tag is guaranteed.
|
|
75
|
+
logger = _root_logger.patch(_os_patcher)
|
|
76
|
+
|
|
77
|
+
|
|
78
|
+
# ---------------------------------------------------------------------------
|
|
79
|
+
# Public field contract
|
|
80
|
+
# ---------------------------------------------------------------------------
|
|
81
|
+
|
|
82
|
+
# Fields that appear on every structured record (populated via contextualize()).
|
|
83
|
+
_CONTEXT_FIELDS: tuple[str, ...] = (
|
|
84
|
+
"request_id",
|
|
85
|
+
"session_id",
|
|
86
|
+
"provider",
|
|
87
|
+
"engine",
|
|
88
|
+
"event",
|
|
89
|
+
"phase",
|
|
90
|
+
"elapsed_ms",
|
|
91
|
+
"ttfb_ms",
|
|
92
|
+
)
|
|
93
|
+
|
|
94
|
+
# Valid perf levels, in order of verbosity.
|
|
95
|
+
_PERF_LEVELS: tuple[str, ...] = ("off", "basic", "verbose")
|
|
96
|
+
|
|
97
|
+
# Valid log formats.
|
|
98
|
+
_LOG_FORMATS: tuple[str, ...] = ("text", "json")
|
|
99
|
+
|
|
100
|
+
|
|
101
|
+
# ---------------------------------------------------------------------------
|
|
102
|
+
# Settings
|
|
103
|
+
# ---------------------------------------------------------------------------
|
|
104
|
+
|
|
105
|
+
|
|
106
|
+
@dataclass
|
|
107
|
+
class LogSettings:
|
|
108
|
+
"""Resolved logging configuration."""
|
|
109
|
+
|
|
110
|
+
level: str = "INFO"
|
|
111
|
+
format: str = "text" # "text" for console-friendly, "json" for structured stdout
|
|
112
|
+
log_dir: Path | None = None # when set, also write rotating JSONL
|
|
113
|
+
log_file_name: str = "openspeech.jsonl"
|
|
114
|
+
rotation: str = "50 MB"
|
|
115
|
+
retention: str = "14 days"
|
|
116
|
+
perf: str = "basic" # off | basic | verbose
|
|
117
|
+
color: bool | None = None # None -> auto
|
|
118
|
+
extra: dict[str, Any] = field(default_factory=dict)
|
|
119
|
+
|
|
120
|
+
|
|
121
|
+
_CURRENT: LogSettings = LogSettings()
|
|
122
|
+
|
|
123
|
+
|
|
124
|
+
def get_log_settings() -> LogSettings:
|
|
125
|
+
"""Return the currently active log settings."""
|
|
126
|
+
return _CURRENT
|
|
127
|
+
|
|
128
|
+
|
|
129
|
+
# ---------------------------------------------------------------------------
|
|
130
|
+
# Context vars (for request_id / session_id / provider / engine propagation)
|
|
131
|
+
# ---------------------------------------------------------------------------
|
|
132
|
+
|
|
133
|
+
_request_id_var: ContextVar[str | None] = ContextVar("os_request_id", default=None)
|
|
134
|
+
_session_id_var: ContextVar[str | None] = ContextVar("os_session_id", default=None)
|
|
135
|
+
_provider_var: ContextVar[str | None] = ContextVar("os_provider", default=None)
|
|
136
|
+
_engine_var: ContextVar[str | None] = ContextVar("os_engine", default=None)
|
|
137
|
+
|
|
138
|
+
|
|
139
|
+
def get_request_id() -> str | None:
|
|
140
|
+
"""Return the request_id currently bound in this async context."""
|
|
141
|
+
return _request_id_var.get()
|
|
142
|
+
|
|
143
|
+
|
|
144
|
+
def get_session_id() -> str | None:
|
|
145
|
+
return _session_id_var.get()
|
|
146
|
+
|
|
147
|
+
|
|
148
|
+
@contextmanager
|
|
149
|
+
def bind_context(
|
|
150
|
+
*,
|
|
151
|
+
request_id: str | None = None,
|
|
152
|
+
session_id: str | None = None,
|
|
153
|
+
provider: str | None = None,
|
|
154
|
+
engine: str | None = None,
|
|
155
|
+
**extra: Any,
|
|
156
|
+
) -> Iterator[dict[str, Any]]:
|
|
157
|
+
"""Bind contextual fields for the duration of the ``with`` block.
|
|
158
|
+
|
|
159
|
+
All fields are optional. ``None`` values are ignored (they do not
|
|
160
|
+
overwrite an outer binding). Returns the resolved context dict so the
|
|
161
|
+
caller can use it for echoing / tracing if desired.
|
|
162
|
+
|
|
163
|
+
This binds both ``loguru.contextualize()`` and Python ``contextvars`` so
|
|
164
|
+
that:
|
|
165
|
+
- any ``logger.*`` call inside the block is auto-decorated;
|
|
166
|
+
- code paths that want to read the current ``request_id`` directly
|
|
167
|
+
(e.g. to echo on the wire) can call :func:`get_request_id`.
|
|
168
|
+
"""
|
|
169
|
+
resets: list[tuple[ContextVar[Any], Any]] = []
|
|
170
|
+
bindings: dict[str, Any] = {}
|
|
171
|
+
|
|
172
|
+
if request_id is not None:
|
|
173
|
+
resets.append((_request_id_var, _request_id_var.set(request_id)))
|
|
174
|
+
bindings["request_id"] = request_id
|
|
175
|
+
if session_id is not None:
|
|
176
|
+
resets.append((_session_id_var, _session_id_var.set(session_id)))
|
|
177
|
+
bindings["session_id"] = session_id
|
|
178
|
+
if provider is not None:
|
|
179
|
+
resets.append((_provider_var, _provider_var.set(provider)))
|
|
180
|
+
bindings["provider"] = provider
|
|
181
|
+
if engine is not None:
|
|
182
|
+
resets.append((_engine_var, _engine_var.set(engine)))
|
|
183
|
+
bindings["engine"] = engine
|
|
184
|
+
for k, v in extra.items():
|
|
185
|
+
if v is not None:
|
|
186
|
+
bindings[k] = v
|
|
187
|
+
|
|
188
|
+
try:
|
|
189
|
+
with logger.contextualize(**bindings):
|
|
190
|
+
yield bindings
|
|
191
|
+
finally:
|
|
192
|
+
for var, token in reversed(resets):
|
|
193
|
+
try:
|
|
194
|
+
var.reset(token)
|
|
195
|
+
except Exception:
|
|
196
|
+
pass
|
|
197
|
+
|
|
198
|
+
|
|
199
|
+
# ---------------------------------------------------------------------------
|
|
200
|
+
# Formatters
|
|
201
|
+
# ---------------------------------------------------------------------------
|
|
202
|
+
|
|
203
|
+
|
|
204
|
+
def _text_format(record: dict[str, Any]) -> str:
|
|
205
|
+
"""Human-friendly single-line format with contextual fields appended."""
|
|
206
|
+
extra = record.get("extra", {}) or {}
|
|
207
|
+
ctx_parts = []
|
|
208
|
+
rid = extra.get("request_id")
|
|
209
|
+
if rid:
|
|
210
|
+
ctx_parts.append(f"rid={rid}")
|
|
211
|
+
prov = extra.get("provider")
|
|
212
|
+
if prov:
|
|
213
|
+
ctx_parts.append(f"prov={prov}")
|
|
214
|
+
event = extra.get("event")
|
|
215
|
+
if event:
|
|
216
|
+
ctx_parts.append(f"event={event}")
|
|
217
|
+
elapsed = extra.get("elapsed_ms")
|
|
218
|
+
if elapsed is not None:
|
|
219
|
+
try:
|
|
220
|
+
ctx_parts.append(f"elapsed_ms={float(elapsed):.2f}")
|
|
221
|
+
except Exception:
|
|
222
|
+
ctx_parts.append(f"elapsed_ms={elapsed}")
|
|
223
|
+
ctx = f" [{' '.join(ctx_parts)}]" if ctx_parts else ""
|
|
224
|
+
|
|
225
|
+
# Note: loguru sink receives the already-formatted {message}. The format
|
|
226
|
+
# string is parsed by loguru, so we must escape curly braces in user data
|
|
227
|
+
# elsewhere — but here we return a plain template string.
|
|
228
|
+
level = record["level"].name
|
|
229
|
+
time_str = record["time"].strftime("%Y-%m-%d %H:%M:%S.%f")[:-3]
|
|
230
|
+
name = record["name"]
|
|
231
|
+
msg = record["message"]
|
|
232
|
+
return f"{time_str} | {level:<7} | {name}{ctx} | {msg}\n"
|
|
233
|
+
|
|
234
|
+
|
|
235
|
+
def _json_serialize(record: dict[str, Any]) -> str:
|
|
236
|
+
"""Serialize a loguru record into one-line JSON.
|
|
237
|
+
|
|
238
|
+
Flat schema matching the documented field contract so both humans and
|
|
239
|
+
LLMs can reliably grep / index. Unknown fields from ``extra`` are kept
|
|
240
|
+
under a ``payload`` sub-object to avoid polluting the top level.
|
|
241
|
+
"""
|
|
242
|
+
extra = dict(record.get("extra") or {})
|
|
243
|
+
top: dict[str, Any] = {
|
|
244
|
+
"ts": record["time"].isoformat(),
|
|
245
|
+
"level": record["level"].name,
|
|
246
|
+
"module": record["name"],
|
|
247
|
+
"message": record["message"],
|
|
248
|
+
}
|
|
249
|
+
# Elevate well-known fields to top level.
|
|
250
|
+
for key in _CONTEXT_FIELDS:
|
|
251
|
+
if key in extra and extra[key] is not None:
|
|
252
|
+
top[key] = extra.pop(key)
|
|
253
|
+
# Exception info.
|
|
254
|
+
exc = record.get("exception")
|
|
255
|
+
if exc is not None:
|
|
256
|
+
top["exception"] = {
|
|
257
|
+
"type": exc.type.__name__ if exc.type else None,
|
|
258
|
+
"value": str(exc.value) if exc.value else None,
|
|
259
|
+
}
|
|
260
|
+
if extra:
|
|
261
|
+
# Remaining unknown extras are preserved so nothing is silently lost.
|
|
262
|
+
top["payload"] = extra
|
|
263
|
+
try:
|
|
264
|
+
return json.dumps(top, ensure_ascii=False, default=str) + "\n"
|
|
265
|
+
except Exception as e:
|
|
266
|
+
# Fallback — never let logging crash the app.
|
|
267
|
+
return json.dumps(
|
|
268
|
+
{"ts": top["ts"], "level": "ERROR", "module": "openspeech.logging_config",
|
|
269
|
+
"message": f"log serialize failed: {e!r}"},
|
|
270
|
+
ensure_ascii=False,
|
|
271
|
+
) + "\n"
|
|
272
|
+
|
|
273
|
+
|
|
274
|
+
def _make_console_sink(fmt: str) -> Any:
|
|
275
|
+
"""Return a loguru sink callable that writes to stderr."""
|
|
276
|
+
if fmt == "json":
|
|
277
|
+
def _sink(message):
|
|
278
|
+
sys.stderr.write(_json_serialize(message.record))
|
|
279
|
+
return _sink
|
|
280
|
+
# text
|
|
281
|
+
def _sink(message):
|
|
282
|
+
sys.stderr.write(_text_format(message.record))
|
|
283
|
+
return _sink
|
|
284
|
+
|
|
285
|
+
|
|
286
|
+
def _make_file_sink_path(log_dir: Path, log_file_name: str) -> Path:
|
|
287
|
+
log_dir.mkdir(parents=True, exist_ok=True)
|
|
288
|
+
return log_dir / log_file_name
|
|
289
|
+
|
|
290
|
+
|
|
291
|
+
def _attach_jsonl_file_sink(
|
|
292
|
+
*,
|
|
293
|
+
file_path: Path,
|
|
294
|
+
level: str,
|
|
295
|
+
rotation: str,
|
|
296
|
+
retention: str,
|
|
297
|
+
) -> None:
|
|
298
|
+
"""Attach a loguru sink that writes structured JSONL with rotation.
|
|
299
|
+
|
|
300
|
+
We use loguru's built-in file sink (so rotation / retention / enqueue
|
|
301
|
+
all continue to work), but route the record through a patcher that
|
|
302
|
+
stashes the serialized JSON string under ``extra["_jsonl"]`` and
|
|
303
|
+
renders it via a plain ``{extra[_jsonl]}`` template. This avoids
|
|
304
|
+
loguru's format engine misinterpreting ``{"ts":...}`` braces as
|
|
305
|
+
template placeholders.
|
|
306
|
+
"""
|
|
307
|
+
def _patcher(record):
|
|
308
|
+
record["extra"]["_jsonl"] = _json_serialize(record).rstrip("\n")
|
|
309
|
+
|
|
310
|
+
sink_id = _root_logger.add(
|
|
311
|
+
str(file_path),
|
|
312
|
+
level=level,
|
|
313
|
+
rotation=rotation,
|
|
314
|
+
retention=retention,
|
|
315
|
+
enqueue=True, # don't block the event loop on disk IO
|
|
316
|
+
backtrace=False,
|
|
317
|
+
diagnose=False,
|
|
318
|
+
serialize=False,
|
|
319
|
+
format="{extra[_jsonl]}",
|
|
320
|
+
filter=lambda record: (_patcher(record) or True),
|
|
321
|
+
)
|
|
322
|
+
_OS_SINK_IDS.append(sink_id)
|
|
323
|
+
|
|
324
|
+
|
|
325
|
+
# ---------------------------------------------------------------------------
|
|
326
|
+
# Public API
|
|
327
|
+
# ---------------------------------------------------------------------------
|
|
328
|
+
|
|
329
|
+
|
|
330
|
+
def _level_from_env(default: str) -> str:
|
|
331
|
+
return (os.environ.get("OPENSPEECH_LOG_LEVEL") or default).upper()
|
|
332
|
+
|
|
333
|
+
|
|
334
|
+
def _format_from_env(default: str) -> str:
|
|
335
|
+
v = (os.environ.get("OPENSPEECH_LOG_FORMAT") or default).lower()
|
|
336
|
+
return v if v in _LOG_FORMATS else default
|
|
337
|
+
|
|
338
|
+
|
|
339
|
+
def _perf_from_env(default: str) -> str:
|
|
340
|
+
v = (os.environ.get("OPENSPEECH_LOG_PERF") or default).lower()
|
|
341
|
+
return v if v in _PERF_LEVELS else default
|
|
342
|
+
|
|
343
|
+
|
|
344
|
+
def _log_dir_from_env(default: Path | None) -> Path | None:
|
|
345
|
+
raw = os.environ.get("OPENSPEECH_LOG_DIR")
|
|
346
|
+
if raw:
|
|
347
|
+
return Path(raw).expanduser()
|
|
348
|
+
return default
|
|
349
|
+
|
|
350
|
+
|
|
351
|
+
def configure_logging(
|
|
352
|
+
*,
|
|
353
|
+
level: str | None = None,
|
|
354
|
+
format: str | None = None,
|
|
355
|
+
log_dir: str | Path | None = None,
|
|
356
|
+
log_file_name: str | None = None,
|
|
357
|
+
perf: str | None = None,
|
|
358
|
+
color: bool | None = None,
|
|
359
|
+
rotation: str | None = None,
|
|
360
|
+
retention: str | None = None,
|
|
361
|
+
default_log_dir: str | Path | None = "logs",
|
|
362
|
+
) -> LogSettings:
|
|
363
|
+
"""Configure loguru globally. Idempotent — safe to call repeatedly.
|
|
364
|
+
|
|
365
|
+
Precedence (highest wins): explicit kwarg → env var → default.
|
|
366
|
+
|
|
367
|
+
``default_log_dir`` controls where the JSONL file is written when no
|
|
368
|
+
explicit ``log_dir`` is provided. Pass ``None`` to disable file output
|
|
369
|
+
by default. Pass ``""`` (empty string) from the CLI to disable
|
|
370
|
+
explicitly.
|
|
371
|
+
"""
|
|
372
|
+
resolved_level = (level or _level_from_env("INFO")).upper()
|
|
373
|
+
resolved_format = (format or _format_from_env("text")).lower()
|
|
374
|
+
resolved_perf = (perf or _perf_from_env("basic")).lower()
|
|
375
|
+
|
|
376
|
+
# Resolve log_dir: explicit arg > env > default. Empty string disables.
|
|
377
|
+
if log_dir is None:
|
|
378
|
+
raw_dir = _log_dir_from_env(
|
|
379
|
+
Path(default_log_dir).expanduser() if default_log_dir else None
|
|
380
|
+
)
|
|
381
|
+
elif log_dir == "":
|
|
382
|
+
raw_dir = None
|
|
383
|
+
else:
|
|
384
|
+
raw_dir = Path(log_dir).expanduser()
|
|
385
|
+
|
|
386
|
+
if resolved_format not in _LOG_FORMATS:
|
|
387
|
+
resolved_format = "text"
|
|
388
|
+
if resolved_perf not in _PERF_LEVELS:
|
|
389
|
+
resolved_perf = "basic"
|
|
390
|
+
|
|
391
|
+
settings = LogSettings(
|
|
392
|
+
level=resolved_level,
|
|
393
|
+
format=resolved_format,
|
|
394
|
+
log_dir=raw_dir,
|
|
395
|
+
log_file_name=log_file_name or "openspeech.jsonl",
|
|
396
|
+
rotation=rotation or "50 MB",
|
|
397
|
+
retention=retention or "14 days",
|
|
398
|
+
perf=resolved_perf,
|
|
399
|
+
color=color,
|
|
400
|
+
)
|
|
401
|
+
|
|
402
|
+
# Remove any sinks we previously owned (do NOT touch host sinks).
|
|
403
|
+
_remove_os_sinks()
|
|
404
|
+
|
|
405
|
+
# If we're the only configurator, also strip loguru's default handler
|
|
406
|
+
# (id=0) so we don't double-log to stderr. If a host is present, leave
|
|
407
|
+
# it alone.
|
|
408
|
+
if not _INTEGRATION["host_managed"]:
|
|
409
|
+
try:
|
|
410
|
+
_root_logger.remove(0)
|
|
411
|
+
except ValueError:
|
|
412
|
+
pass # already removed
|
|
413
|
+
|
|
414
|
+
# Console sink — only when we own the output pipeline.
|
|
415
|
+
if not _INTEGRATION["host_managed"]:
|
|
416
|
+
sink_id = _root_logger.add(
|
|
417
|
+
_make_console_sink(settings.format),
|
|
418
|
+
level=settings.level,
|
|
419
|
+
enqueue=False,
|
|
420
|
+
backtrace=False,
|
|
421
|
+
diagnose=False,
|
|
422
|
+
)
|
|
423
|
+
_OS_SINK_IDS.append(sink_id)
|
|
424
|
+
|
|
425
|
+
# File sink (structured JSONL, rotated)
|
|
426
|
+
if settings.log_dir is not None:
|
|
427
|
+
try:
|
|
428
|
+
file_path = _make_file_sink_path(settings.log_dir, settings.log_file_name)
|
|
429
|
+
_attach_jsonl_file_sink(
|
|
430
|
+
file_path=file_path,
|
|
431
|
+
level=settings.level,
|
|
432
|
+
rotation=settings.rotation,
|
|
433
|
+
retention=settings.retention,
|
|
434
|
+
)
|
|
435
|
+
except Exception as e:
|
|
436
|
+
sys.stderr.write(f"[openspeech] failed to attach file log sink: {e}\n")
|
|
437
|
+
|
|
438
|
+
# Update module-level current settings.
|
|
439
|
+
global _CURRENT
|
|
440
|
+
_CURRENT = settings
|
|
441
|
+
return settings
|
|
442
|
+
|
|
443
|
+
|
|
444
|
+
def _remove_os_sinks() -> None:
|
|
445
|
+
"""Remove all sinks previously added by OpenSpeech (leave host sinks alone)."""
|
|
446
|
+
while _OS_SINK_IDS:
|
|
447
|
+
sink_id = _OS_SINK_IDS.pop()
|
|
448
|
+
try:
|
|
449
|
+
_root_logger.remove(sink_id)
|
|
450
|
+
except ValueError:
|
|
451
|
+
pass # already removed
|
|
452
|
+
|
|
453
|
+
|
|
454
|
+
def ensure_configured() -> LogSettings:
|
|
455
|
+
"""Call ``configure_logging`` with defaults if it hasn't been called yet.
|
|
456
|
+
|
|
457
|
+
When a host application has already taken over via
|
|
458
|
+
:func:`integrate_with_host` (``host_managed=True``), this is a no-op —
|
|
459
|
+
we don't want to add sinks on top of the host's pipeline.
|
|
460
|
+
"""
|
|
461
|
+
if _INTEGRATION["host_managed"]:
|
|
462
|
+
return _CURRENT
|
|
463
|
+
if not getattr(_CURRENT, "_applied", False):
|
|
464
|
+
s = configure_logging()
|
|
465
|
+
object.__setattr__(s, "_applied", True)
|
|
466
|
+
return s
|
|
467
|
+
return _CURRENT
|
|
468
|
+
|
|
469
|
+
|
|
470
|
+
# ---------------------------------------------------------------------------
|
|
471
|
+
# Host integration (let another loguru-based app own the sinks)
|
|
472
|
+
# ---------------------------------------------------------------------------
|
|
473
|
+
|
|
474
|
+
|
|
475
|
+
def integrate_with_host(
|
|
476
|
+
*,
|
|
477
|
+
tag: str = "openspeech",
|
|
478
|
+
level: str | None = None,
|
|
479
|
+
perf: str | None = None,
|
|
480
|
+
attach_sinks: bool = False,
|
|
481
|
+
) -> LogSettings:
|
|
482
|
+
"""Hand sink management over to a host application using loguru.
|
|
483
|
+
|
|
484
|
+
The host keeps its own sinks; OpenSpeech only guarantees that every
|
|
485
|
+
record it emits carries ``record.extra["component"] == tag`` so the
|
|
486
|
+
host can format, filter, or route those records.
|
|
487
|
+
|
|
488
|
+
Parameters
|
|
489
|
+
----------
|
|
490
|
+
tag
|
|
491
|
+
Value written into ``record.extra["component"]`` for every
|
|
492
|
+
OpenSpeech log. Defaults to ``"openspeech"``.
|
|
493
|
+
level
|
|
494
|
+
Minimum level applied *only to OpenSpeech records*. This is
|
|
495
|
+
enforced at sink time via :func:`openspeech_level_filter`, so the
|
|
496
|
+
host's own log level is unaffected. ``None`` keeps whatever the
|
|
497
|
+
host sink decides.
|
|
498
|
+
perf
|
|
499
|
+
Performance milestone verbosity (``off`` / ``basic`` / ``verbose``)
|
|
500
|
+
— independent of the host's log level.
|
|
501
|
+
attach_sinks
|
|
502
|
+
When ``False`` (default) OpenSpeech adds no sinks of its own; the
|
|
503
|
+
host is expected to have already registered at least one. Set to
|
|
504
|
+
``True`` to keep the built-in stderr + JSONL sinks *in addition*
|
|
505
|
+
to the host's sinks (rarely useful; provided as an escape hatch).
|
|
506
|
+
|
|
507
|
+
Returns
|
|
508
|
+
-------
|
|
509
|
+
LogSettings
|
|
510
|
+
The resolved settings; also stored on the module for
|
|
511
|
+
:func:`get_log_settings` to return.
|
|
512
|
+
"""
|
|
513
|
+
_INTEGRATION["tag"] = tag
|
|
514
|
+
_INTEGRATION["level"] = level.upper() if level else None
|
|
515
|
+
_INTEGRATION["host_managed"] = not attach_sinks
|
|
516
|
+
|
|
517
|
+
global _CURRENT
|
|
518
|
+
_CURRENT = LogSettings(
|
|
519
|
+
level=(level.upper() if level else _CURRENT.level),
|
|
520
|
+
format=_CURRENT.format,
|
|
521
|
+
log_dir=_CURRENT.log_dir,
|
|
522
|
+
log_file_name=_CURRENT.log_file_name,
|
|
523
|
+
rotation=_CURRENT.rotation,
|
|
524
|
+
retention=_CURRENT.retention,
|
|
525
|
+
perf=(perf.lower() if perf else _CURRENT.perf),
|
|
526
|
+
color=_CURRENT.color,
|
|
527
|
+
)
|
|
528
|
+
object.__setattr__(_CURRENT, "_applied", True)
|
|
529
|
+
|
|
530
|
+
if attach_sinks:
|
|
531
|
+
# Keep our sinks *in addition to* host sinks. Call the normal
|
|
532
|
+
# configure path, but the _INTEGRATION["host_managed"] gate is
|
|
533
|
+
# already off so sinks get added.
|
|
534
|
+
_INTEGRATION["host_managed"] = False
|
|
535
|
+
configure_logging(
|
|
536
|
+
level=_CURRENT.level,
|
|
537
|
+
format=_CURRENT.format,
|
|
538
|
+
perf=_CURRENT.perf,
|
|
539
|
+
)
|
|
540
|
+
else:
|
|
541
|
+
# Host is fully in control — drop any sinks we own.
|
|
542
|
+
_remove_os_sinks()
|
|
543
|
+
|
|
544
|
+
return _CURRENT
|
|
545
|
+
|
|
546
|
+
|
|
547
|
+
def openspeech_filter(record: dict[str, Any]) -> bool:
|
|
548
|
+
"""Loguru ``filter`` that matches only OpenSpeech records.
|
|
549
|
+
|
|
550
|
+
Usage (host side)::
|
|
551
|
+
|
|
552
|
+
logger.add("openspeech.log", filter=openspeech_filter)
|
|
553
|
+
"""
|
|
554
|
+
return record.get("extra", {}).get("component") == _INTEGRATION["tag"]
|
|
555
|
+
|
|
556
|
+
|
|
557
|
+
def openspeech_level_filter(min_level: str) -> Callable[[dict[str, Any]], bool]:
|
|
558
|
+
"""Return a sink filter that enforces ``min_level`` *only* for OpenSpeech records.
|
|
559
|
+
|
|
560
|
+
Non-OpenSpeech records pass through unchanged — use this on a shared
|
|
561
|
+
host sink that receives both host and OpenSpeech logs but should apply
|
|
562
|
+
a different threshold to OpenSpeech.
|
|
563
|
+
|
|
564
|
+
Usage (host side)::
|
|
565
|
+
|
|
566
|
+
logger.add("wallex.log", filter=openspeech_level_filter("WARNING"))
|
|
567
|
+
"""
|
|
568
|
+
target = min_level.upper()
|
|
569
|
+
# Resolve threshold once — loguru levels don't change at runtime.
|
|
570
|
+
try:
|
|
571
|
+
threshold_no = _root_logger.level(target).no
|
|
572
|
+
except Exception:
|
|
573
|
+
threshold_no = 20 # INFO fallback
|
|
574
|
+
|
|
575
|
+
def _filter(record: dict[str, Any]) -> bool:
|
|
576
|
+
if record.get("extra", {}).get("component") != _INTEGRATION["tag"]:
|
|
577
|
+
return True # host records bypass
|
|
578
|
+
return record["level"].no >= threshold_no
|
|
579
|
+
|
|
580
|
+
return _filter
|
|
581
|
+
|
|
582
|
+
|
|
583
|
+
def get_integration_tag() -> str:
|
|
584
|
+
"""Return the current component tag used for OpenSpeech records."""
|
|
585
|
+
return _INTEGRATION["tag"]
|
|
586
|
+
|
|
587
|
+
|
|
588
|
+
def is_host_managed() -> bool:
|
|
589
|
+
"""Return True if a host application has taken over sink management."""
|
|
590
|
+
return bool(_INTEGRATION["host_managed"])
|
|
591
|
+
|
|
592
|
+
|
|
593
|
+
__all__ = [
|
|
594
|
+
"LogSettings",
|
|
595
|
+
"bind_context",
|
|
596
|
+
"configure_logging",
|
|
597
|
+
"ensure_configured",
|
|
598
|
+
"get_integration_tag",
|
|
599
|
+
"get_log_settings",
|
|
600
|
+
"get_request_id",
|
|
601
|
+
"get_session_id",
|
|
602
|
+
"integrate_with_host",
|
|
603
|
+
"is_host_managed",
|
|
604
|
+
"logger",
|
|
605
|
+
"openspeech_filter",
|
|
606
|
+
"openspeech_level_filter",
|
|
607
|
+
]
|
|
File without changes
|