openspeechapi 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (118) hide show
  1. openspeech/__init__.py +75 -0
  2. openspeech/__main__.py +5 -0
  3. openspeech/cli.py +413 -0
  4. openspeech/client/__init__.py +4 -0
  5. openspeech/client/client.py +145 -0
  6. openspeech/config.py +212 -0
  7. openspeech/core/__init__.py +0 -0
  8. openspeech/core/base.py +75 -0
  9. openspeech/core/enums.py +39 -0
  10. openspeech/core/models.py +61 -0
  11. openspeech/core/registry.py +37 -0
  12. openspeech/core/settings.py +8 -0
  13. openspeech/demo.py +675 -0
  14. openspeech/dispatch/__init__.py +0 -0
  15. openspeech/dispatch/context.py +34 -0
  16. openspeech/dispatch/dispatcher.py +661 -0
  17. openspeech/dispatch/executors/__init__.py +0 -0
  18. openspeech/dispatch/executors/base.py +34 -0
  19. openspeech/dispatch/executors/in_process.py +66 -0
  20. openspeech/dispatch/executors/remote.py +64 -0
  21. openspeech/dispatch/executors/subprocess_exec.py +446 -0
  22. openspeech/dispatch/fanout.py +95 -0
  23. openspeech/dispatch/filters.py +73 -0
  24. openspeech/dispatch/lifecycle.py +178 -0
  25. openspeech/dispatch/watcher.py +82 -0
  26. openspeech/engine_catalog.py +236 -0
  27. openspeech/engine_registry.yaml +347 -0
  28. openspeech/exceptions.py +51 -0
  29. openspeech/factory.py +325 -0
  30. openspeech/local_engines/__init__.py +12 -0
  31. openspeech/local_engines/aim_resolver.py +91 -0
  32. openspeech/local_engines/backends/__init__.py +1 -0
  33. openspeech/local_engines/backends/docker_backend.py +490 -0
  34. openspeech/local_engines/backends/native_backend.py +902 -0
  35. openspeech/local_engines/base.py +30 -0
  36. openspeech/local_engines/engines/__init__.py +1 -0
  37. openspeech/local_engines/engines/faster_whisper.py +36 -0
  38. openspeech/local_engines/engines/fish_speech.py +33 -0
  39. openspeech/local_engines/engines/sherpa_onnx.py +56 -0
  40. openspeech/local_engines/engines/whisper.py +41 -0
  41. openspeech/local_engines/engines/whisperlivekit.py +60 -0
  42. openspeech/local_engines/manager.py +208 -0
  43. openspeech/local_engines/models.py +50 -0
  44. openspeech/local_engines/progress.py +69 -0
  45. openspeech/local_engines/registry.py +19 -0
  46. openspeech/local_engines/task_store.py +52 -0
  47. openspeech/local_engines/tasks.py +71 -0
  48. openspeech/logging_config.py +607 -0
  49. openspeech/observe/__init__.py +0 -0
  50. openspeech/observe/base.py +79 -0
  51. openspeech/observe/debug.py +44 -0
  52. openspeech/observe/latency.py +19 -0
  53. openspeech/observe/metrics.py +47 -0
  54. openspeech/observe/tracing.py +44 -0
  55. openspeech/observe/usage.py +27 -0
  56. openspeech/providers/__init__.py +0 -0
  57. openspeech/providers/_template.py +101 -0
  58. openspeech/providers/stt/__init__.py +0 -0
  59. openspeech/providers/stt/alibaba.py +86 -0
  60. openspeech/providers/stt/assemblyai.py +135 -0
  61. openspeech/providers/stt/azure_speech.py +99 -0
  62. openspeech/providers/stt/baidu.py +135 -0
  63. openspeech/providers/stt/deepgram.py +311 -0
  64. openspeech/providers/stt/elevenlabs.py +385 -0
  65. openspeech/providers/stt/faster_whisper.py +211 -0
  66. openspeech/providers/stt/google_cloud.py +106 -0
  67. openspeech/providers/stt/iflytek.py +427 -0
  68. openspeech/providers/stt/macos_speech.py +226 -0
  69. openspeech/providers/stt/openai.py +84 -0
  70. openspeech/providers/stt/sherpa_onnx.py +353 -0
  71. openspeech/providers/stt/tencent.py +212 -0
  72. openspeech/providers/stt/volcengine.py +107 -0
  73. openspeech/providers/stt/whisper.py +153 -0
  74. openspeech/providers/stt/whisperlivekit.py +530 -0
  75. openspeech/providers/stt/windows_speech.py +249 -0
  76. openspeech/providers/tts/__init__.py +0 -0
  77. openspeech/providers/tts/alibaba.py +95 -0
  78. openspeech/providers/tts/azure_speech.py +123 -0
  79. openspeech/providers/tts/baidu.py +143 -0
  80. openspeech/providers/tts/coqui.py +64 -0
  81. openspeech/providers/tts/cosyvoice.py +90 -0
  82. openspeech/providers/tts/deepgram.py +174 -0
  83. openspeech/providers/tts/elevenlabs.py +311 -0
  84. openspeech/providers/tts/fish_speech.py +158 -0
  85. openspeech/providers/tts/google_cloud.py +107 -0
  86. openspeech/providers/tts/iflytek.py +209 -0
  87. openspeech/providers/tts/macos_say.py +251 -0
  88. openspeech/providers/tts/minimax.py +122 -0
  89. openspeech/providers/tts/openai.py +104 -0
  90. openspeech/providers/tts/piper.py +104 -0
  91. openspeech/providers/tts/tencent.py +189 -0
  92. openspeech/providers/tts/volcengine.py +117 -0
  93. openspeech/providers/tts/windows_sapi.py +234 -0
  94. openspeech/server/__init__.py +1 -0
  95. openspeech/server/app.py +72 -0
  96. openspeech/server/auth.py +42 -0
  97. openspeech/server/middleware.py +75 -0
  98. openspeech/server/routes/__init__.py +1 -0
  99. openspeech/server/routes/management.py +848 -0
  100. openspeech/server/routes/stt.py +121 -0
  101. openspeech/server/routes/tts.py +159 -0
  102. openspeech/server/routes/webui.py +29 -0
  103. openspeech/server/webui/app.js +2649 -0
  104. openspeech/server/webui/index.html +216 -0
  105. openspeech/server/webui/styles.css +617 -0
  106. openspeech/server/ws/__init__.py +1 -0
  107. openspeech/server/ws/stt_stream.py +263 -0
  108. openspeech/server/ws/tts_stream.py +207 -0
  109. openspeech/telemetry/__init__.py +21 -0
  110. openspeech/telemetry/perf.py +307 -0
  111. openspeech/utils/__init__.py +5 -0
  112. openspeech/utils/audio_converter.py +406 -0
  113. openspeech/utils/audio_playback.py +156 -0
  114. openspeech/vendor_registry.yaml +74 -0
  115. openspeechapi-0.1.0.dist-info/METADATA +101 -0
  116. openspeechapi-0.1.0.dist-info/RECORD +118 -0
  117. openspeechapi-0.1.0.dist-info/WHEEL +4 -0
  118. openspeechapi-0.1.0.dist-info/entry_points.txt +3 -0
@@ -0,0 +1,71 @@
1
+ """Task models for local engine operations."""
2
+ from __future__ import annotations
3
+
4
+ from dataclasses import dataclass, field
5
+ from datetime import datetime, timezone
6
+ from typing import Any
7
+ from uuid import uuid4
8
+
9
+ from openspeech.local_engines.models import EngineAction, TaskStatus
10
+
11
+
12
+ def _utc_now() -> datetime:
13
+ return datetime.now(timezone.utc)
14
+
15
+
16
+ @dataclass
17
+ class EngineTask:
18
+ engine: str
19
+ action: EngineAction
20
+ runtime: str
21
+ task_id: str = field(default_factory=lambda: uuid4().hex)
22
+ status: TaskStatus = TaskStatus.QUEUED
23
+ phase: str = "queued"
24
+ message: str = "Task queued."
25
+ progress: float | None = 0.0
26
+ eta_seconds: int | None = None
27
+ error: str | None = None
28
+ started_at: datetime = field(default_factory=_utc_now)
29
+ updated_at: datetime = field(default_factory=_utc_now)
30
+ finished_at: datetime | None = None
31
+ metadata: dict[str, Any] = field(default_factory=dict)
32
+
33
+ def snapshot(self) -> dict[str, Any]:
34
+ return {
35
+ "task_id": self.task_id,
36
+ "engine": self.engine,
37
+ "action": self.action.value,
38
+ "runtime": self.runtime,
39
+ "status": self.status.value,
40
+ "phase": self.phase,
41
+ "message": self.message,
42
+ "progress": self.progress,
43
+ "eta_seconds": self.eta_seconds,
44
+ "error": self.error,
45
+ "started_at": self.started_at.isoformat(),
46
+ "updated_at": self.updated_at.isoformat(),
47
+ "finished_at": self.finished_at.isoformat() if self.finished_at else None,
48
+ "metadata": self.metadata,
49
+ }
50
+
51
+ @classmethod
52
+ def from_snapshot(cls, data: dict[str, Any]) -> "EngineTask":
53
+ task = cls(
54
+ engine=data["engine"],
55
+ action=EngineAction(data["action"]),
56
+ runtime=data["runtime"],
57
+ task_id=data["task_id"],
58
+ status=TaskStatus(data["status"]),
59
+ phase=data.get("phase", "queued"),
60
+ message=data.get("message", ""),
61
+ progress=data.get("progress"),
62
+ eta_seconds=data.get("eta_seconds"),
63
+ error=data.get("error"),
64
+ metadata=data.get("metadata", {}),
65
+ )
66
+ task.started_at = datetime.fromisoformat(data["started_at"])
67
+ task.updated_at = datetime.fromisoformat(data["updated_at"])
68
+ finished_raw = data.get("finished_at")
69
+ if finished_raw:
70
+ task.finished_at = datetime.fromisoformat(finished_raw)
71
+ return task
@@ -0,0 +1,607 @@
1
+ """Unified logging configuration for OpenSpeechAPI.
2
+
3
+ Responsibilities:
4
+ - Configure loguru sinks (console + rotating JSONL file) with a consistent
5
+ field contract so logs can be parsed by humans, LLMs, and log pipelines.
6
+ - Provide contextvars-bound fields (``request_id``, ``session_id``,
7
+ ``provider``, ``engine``) that automatically decorate every log record
8
+ emitted under a given async request / task tree.
9
+ - Expose a simple public API (:func:`configure_logging`, :func:`bind_context`,
10
+ :func:`get_log_settings`) that CLI, server app, client and tests can share.
11
+
12
+ Field contract (see ``docs/architecture/logging-spec.md`` for full details)::
13
+
14
+ ts, level, event, module, message,
15
+ request_id, session_id, provider, engine,
16
+ phase, elapsed_ms, ttfb_ms, payload
17
+
18
+ ``event`` is a dotted namespace (e.g. ``ws.first_response``,
19
+ ``dispatch.dispatch_total``) and is always set for structured milestone
20
+ records emitted via :mod:`openspeech.telemetry.perf`. Legacy free-form
21
+ ``logger.info("...")`` calls simply leave ``event`` empty.
22
+ """
23
+ from __future__ import annotations
24
+
25
+ import json
26
+ import os
27
+ import sys
28
+ from contextlib import contextmanager
29
+ from contextvars import ContextVar
30
+ from dataclasses import dataclass, field
31
+ from pathlib import Path
32
+ from typing import Any, Callable, Iterator
33
+
34
+ from loguru import logger as _root_logger
35
+
36
+ # ---------------------------------------------------------------------------
37
+ # Host-integration state
38
+ # ---------------------------------------------------------------------------
39
+ #
40
+ # OpenSpeech can run in one of two modes:
41
+ #
42
+ # * **standalone** (default) — our own console + JSONL file sinks, managed
43
+ # by :func:`configure_logging`.
44
+ # * **host-managed** — another loguru-based application (e.g. wallex)
45
+ # wants to own the sinks, and we just emit records with a known tag.
46
+ #
47
+ # The tag is attached to every record emitted through :data:`logger`
48
+ # (the re-exported loguru instance every openspeech module imports). The
49
+ # host can read ``record["extra"]["component"]`` in its own sink to
50
+ # format, filter, or route OpenSpeech records.
51
+
52
+ _INTEGRATION: dict[str, Any] = {
53
+ "tag": "openspeech", # record.extra["component"] value
54
+ "level": None, # optional independent min-level for OS records
55
+ "host_managed": False, # True → ensure_configured() is a no-op
56
+ }
57
+
58
+ # Track sink IDs we own so we can remove them without touching host sinks.
59
+ _OS_SINK_IDS: list[int] = []
60
+
61
+
62
+ def _os_patcher(record: dict[str, Any]) -> None:
63
+ """Stamp every OpenSpeech log record with the configured component tag.
64
+
65
+ The value is read from :data:`_INTEGRATION` at emit-time so that
66
+ :func:`integrate_with_host` can change it at runtime without rebinding
67
+ the logger reference held by every openspeech module.
68
+ """
69
+ record["extra"].setdefault("component", _INTEGRATION["tag"])
70
+
71
+
72
+ # Single logger instance shared by the whole openspeech package. All
73
+ # internal modules must import it as ``from openspeech.logging_config
74
+ # import logger`` so the component tag is guaranteed.
75
+ logger = _root_logger.patch(_os_patcher)
76
+
77
+
78
+ # ---------------------------------------------------------------------------
79
+ # Public field contract
80
+ # ---------------------------------------------------------------------------
81
+
82
+ # Fields that appear on every structured record (populated via contextualize()).
83
+ _CONTEXT_FIELDS: tuple[str, ...] = (
84
+ "request_id",
85
+ "session_id",
86
+ "provider",
87
+ "engine",
88
+ "event",
89
+ "phase",
90
+ "elapsed_ms",
91
+ "ttfb_ms",
92
+ )
93
+
94
+ # Valid perf levels, in order of verbosity.
95
+ _PERF_LEVELS: tuple[str, ...] = ("off", "basic", "verbose")
96
+
97
+ # Valid log formats.
98
+ _LOG_FORMATS: tuple[str, ...] = ("text", "json")
99
+
100
+
101
+ # ---------------------------------------------------------------------------
102
+ # Settings
103
+ # ---------------------------------------------------------------------------
104
+
105
+
106
+ @dataclass
107
+ class LogSettings:
108
+ """Resolved logging configuration."""
109
+
110
+ level: str = "INFO"
111
+ format: str = "text" # "text" for console-friendly, "json" for structured stdout
112
+ log_dir: Path | None = None # when set, also write rotating JSONL
113
+ log_file_name: str = "openspeech.jsonl"
114
+ rotation: str = "50 MB"
115
+ retention: str = "14 days"
116
+ perf: str = "basic" # off | basic | verbose
117
+ color: bool | None = None # None -> auto
118
+ extra: dict[str, Any] = field(default_factory=dict)
119
+
120
+
121
+ _CURRENT: LogSettings = LogSettings()
122
+
123
+
124
+ def get_log_settings() -> LogSettings:
125
+ """Return the currently active log settings."""
126
+ return _CURRENT
127
+
128
+
129
+ # ---------------------------------------------------------------------------
130
+ # Context vars (for request_id / session_id / provider / engine propagation)
131
+ # ---------------------------------------------------------------------------
132
+
133
+ _request_id_var: ContextVar[str | None] = ContextVar("os_request_id", default=None)
134
+ _session_id_var: ContextVar[str | None] = ContextVar("os_session_id", default=None)
135
+ _provider_var: ContextVar[str | None] = ContextVar("os_provider", default=None)
136
+ _engine_var: ContextVar[str | None] = ContextVar("os_engine", default=None)
137
+
138
+
139
+ def get_request_id() -> str | None:
140
+ """Return the request_id currently bound in this async context."""
141
+ return _request_id_var.get()
142
+
143
+
144
+ def get_session_id() -> str | None:
145
+ return _session_id_var.get()
146
+
147
+
148
+ @contextmanager
149
+ def bind_context(
150
+ *,
151
+ request_id: str | None = None,
152
+ session_id: str | None = None,
153
+ provider: str | None = None,
154
+ engine: str | None = None,
155
+ **extra: Any,
156
+ ) -> Iterator[dict[str, Any]]:
157
+ """Bind contextual fields for the duration of the ``with`` block.
158
+
159
+ All fields are optional. ``None`` values are ignored (they do not
160
+ overwrite an outer binding). Returns the resolved context dict so the
161
+ caller can use it for echoing / tracing if desired.
162
+
163
+ This binds both ``loguru.contextualize()`` and Python ``contextvars`` so
164
+ that:
165
+ - any ``logger.*`` call inside the block is auto-decorated;
166
+ - code paths that want to read the current ``request_id`` directly
167
+ (e.g. to echo on the wire) can call :func:`get_request_id`.
168
+ """
169
+ resets: list[tuple[ContextVar[Any], Any]] = []
170
+ bindings: dict[str, Any] = {}
171
+
172
+ if request_id is not None:
173
+ resets.append((_request_id_var, _request_id_var.set(request_id)))
174
+ bindings["request_id"] = request_id
175
+ if session_id is not None:
176
+ resets.append((_session_id_var, _session_id_var.set(session_id)))
177
+ bindings["session_id"] = session_id
178
+ if provider is not None:
179
+ resets.append((_provider_var, _provider_var.set(provider)))
180
+ bindings["provider"] = provider
181
+ if engine is not None:
182
+ resets.append((_engine_var, _engine_var.set(engine)))
183
+ bindings["engine"] = engine
184
+ for k, v in extra.items():
185
+ if v is not None:
186
+ bindings[k] = v
187
+
188
+ try:
189
+ with logger.contextualize(**bindings):
190
+ yield bindings
191
+ finally:
192
+ for var, token in reversed(resets):
193
+ try:
194
+ var.reset(token)
195
+ except Exception:
196
+ pass
197
+
198
+
199
+ # ---------------------------------------------------------------------------
200
+ # Formatters
201
+ # ---------------------------------------------------------------------------
202
+
203
+
204
+ def _text_format(record: dict[str, Any]) -> str:
205
+ """Human-friendly single-line format with contextual fields appended."""
206
+ extra = record.get("extra", {}) or {}
207
+ ctx_parts = []
208
+ rid = extra.get("request_id")
209
+ if rid:
210
+ ctx_parts.append(f"rid={rid}")
211
+ prov = extra.get("provider")
212
+ if prov:
213
+ ctx_parts.append(f"prov={prov}")
214
+ event = extra.get("event")
215
+ if event:
216
+ ctx_parts.append(f"event={event}")
217
+ elapsed = extra.get("elapsed_ms")
218
+ if elapsed is not None:
219
+ try:
220
+ ctx_parts.append(f"elapsed_ms={float(elapsed):.2f}")
221
+ except Exception:
222
+ ctx_parts.append(f"elapsed_ms={elapsed}")
223
+ ctx = f" [{' '.join(ctx_parts)}]" if ctx_parts else ""
224
+
225
+ # Note: loguru sink receives the already-formatted {message}. The format
226
+ # string is parsed by loguru, so we must escape curly braces in user data
227
+ # elsewhere — but here we return a plain template string.
228
+ level = record["level"].name
229
+ time_str = record["time"].strftime("%Y-%m-%d %H:%M:%S.%f")[:-3]
230
+ name = record["name"]
231
+ msg = record["message"]
232
+ return f"{time_str} | {level:<7} | {name}{ctx} | {msg}\n"
233
+
234
+
235
+ def _json_serialize(record: dict[str, Any]) -> str:
236
+ """Serialize a loguru record into one-line JSON.
237
+
238
+ Flat schema matching the documented field contract so both humans and
239
+ LLMs can reliably grep / index. Unknown fields from ``extra`` are kept
240
+ under a ``payload`` sub-object to avoid polluting the top level.
241
+ """
242
+ extra = dict(record.get("extra") or {})
243
+ top: dict[str, Any] = {
244
+ "ts": record["time"].isoformat(),
245
+ "level": record["level"].name,
246
+ "module": record["name"],
247
+ "message": record["message"],
248
+ }
249
+ # Elevate well-known fields to top level.
250
+ for key in _CONTEXT_FIELDS:
251
+ if key in extra and extra[key] is not None:
252
+ top[key] = extra.pop(key)
253
+ # Exception info.
254
+ exc = record.get("exception")
255
+ if exc is not None:
256
+ top["exception"] = {
257
+ "type": exc.type.__name__ if exc.type else None,
258
+ "value": str(exc.value) if exc.value else None,
259
+ }
260
+ if extra:
261
+ # Remaining unknown extras are preserved so nothing is silently lost.
262
+ top["payload"] = extra
263
+ try:
264
+ return json.dumps(top, ensure_ascii=False, default=str) + "\n"
265
+ except Exception as e:
266
+ # Fallback — never let logging crash the app.
267
+ return json.dumps(
268
+ {"ts": top["ts"], "level": "ERROR", "module": "openspeech.logging_config",
269
+ "message": f"log serialize failed: {e!r}"},
270
+ ensure_ascii=False,
271
+ ) + "\n"
272
+
273
+
274
+ def _make_console_sink(fmt: str) -> Any:
275
+ """Return a loguru sink callable that writes to stderr."""
276
+ if fmt == "json":
277
+ def _sink(message):
278
+ sys.stderr.write(_json_serialize(message.record))
279
+ return _sink
280
+ # text
281
+ def _sink(message):
282
+ sys.stderr.write(_text_format(message.record))
283
+ return _sink
284
+
285
+
286
+ def _make_file_sink_path(log_dir: Path, log_file_name: str) -> Path:
287
+ log_dir.mkdir(parents=True, exist_ok=True)
288
+ return log_dir / log_file_name
289
+
290
+
291
+ def _attach_jsonl_file_sink(
292
+ *,
293
+ file_path: Path,
294
+ level: str,
295
+ rotation: str,
296
+ retention: str,
297
+ ) -> None:
298
+ """Attach a loguru sink that writes structured JSONL with rotation.
299
+
300
+ We use loguru's built-in file sink (so rotation / retention / enqueue
301
+ all continue to work), but route the record through a patcher that
302
+ stashes the serialized JSON string under ``extra["_jsonl"]`` and
303
+ renders it via a plain ``{extra[_jsonl]}`` template. This avoids
304
+ loguru's format engine misinterpreting ``{"ts":...}`` braces as
305
+ template placeholders.
306
+ """
307
+ def _patcher(record):
308
+ record["extra"]["_jsonl"] = _json_serialize(record).rstrip("\n")
309
+
310
+ sink_id = _root_logger.add(
311
+ str(file_path),
312
+ level=level,
313
+ rotation=rotation,
314
+ retention=retention,
315
+ enqueue=True, # don't block the event loop on disk IO
316
+ backtrace=False,
317
+ diagnose=False,
318
+ serialize=False,
319
+ format="{extra[_jsonl]}",
320
+ filter=lambda record: (_patcher(record) or True),
321
+ )
322
+ _OS_SINK_IDS.append(sink_id)
323
+
324
+
325
+ # ---------------------------------------------------------------------------
326
+ # Public API
327
+ # ---------------------------------------------------------------------------
328
+
329
+
330
+ def _level_from_env(default: str) -> str:
331
+ return (os.environ.get("OPENSPEECH_LOG_LEVEL") or default).upper()
332
+
333
+
334
+ def _format_from_env(default: str) -> str:
335
+ v = (os.environ.get("OPENSPEECH_LOG_FORMAT") or default).lower()
336
+ return v if v in _LOG_FORMATS else default
337
+
338
+
339
+ def _perf_from_env(default: str) -> str:
340
+ v = (os.environ.get("OPENSPEECH_LOG_PERF") or default).lower()
341
+ return v if v in _PERF_LEVELS else default
342
+
343
+
344
+ def _log_dir_from_env(default: Path | None) -> Path | None:
345
+ raw = os.environ.get("OPENSPEECH_LOG_DIR")
346
+ if raw:
347
+ return Path(raw).expanduser()
348
+ return default
349
+
350
+
351
+ def configure_logging(
352
+ *,
353
+ level: str | None = None,
354
+ format: str | None = None,
355
+ log_dir: str | Path | None = None,
356
+ log_file_name: str | None = None,
357
+ perf: str | None = None,
358
+ color: bool | None = None,
359
+ rotation: str | None = None,
360
+ retention: str | None = None,
361
+ default_log_dir: str | Path | None = "logs",
362
+ ) -> LogSettings:
363
+ """Configure loguru globally. Idempotent — safe to call repeatedly.
364
+
365
+ Precedence (highest wins): explicit kwarg → env var → default.
366
+
367
+ ``default_log_dir`` controls where the JSONL file is written when no
368
+ explicit ``log_dir`` is provided. Pass ``None`` to disable file output
369
+ by default. Pass ``""`` (empty string) from the CLI to disable
370
+ explicitly.
371
+ """
372
+ resolved_level = (level or _level_from_env("INFO")).upper()
373
+ resolved_format = (format or _format_from_env("text")).lower()
374
+ resolved_perf = (perf or _perf_from_env("basic")).lower()
375
+
376
+ # Resolve log_dir: explicit arg > env > default. Empty string disables.
377
+ if log_dir is None:
378
+ raw_dir = _log_dir_from_env(
379
+ Path(default_log_dir).expanduser() if default_log_dir else None
380
+ )
381
+ elif log_dir == "":
382
+ raw_dir = None
383
+ else:
384
+ raw_dir = Path(log_dir).expanduser()
385
+
386
+ if resolved_format not in _LOG_FORMATS:
387
+ resolved_format = "text"
388
+ if resolved_perf not in _PERF_LEVELS:
389
+ resolved_perf = "basic"
390
+
391
+ settings = LogSettings(
392
+ level=resolved_level,
393
+ format=resolved_format,
394
+ log_dir=raw_dir,
395
+ log_file_name=log_file_name or "openspeech.jsonl",
396
+ rotation=rotation or "50 MB",
397
+ retention=retention or "14 days",
398
+ perf=resolved_perf,
399
+ color=color,
400
+ )
401
+
402
+ # Remove any sinks we previously owned (do NOT touch host sinks).
403
+ _remove_os_sinks()
404
+
405
+ # If we're the only configurator, also strip loguru's default handler
406
+ # (id=0) so we don't double-log to stderr. If a host is present, leave
407
+ # it alone.
408
+ if not _INTEGRATION["host_managed"]:
409
+ try:
410
+ _root_logger.remove(0)
411
+ except ValueError:
412
+ pass # already removed
413
+
414
+ # Console sink — only when we own the output pipeline.
415
+ if not _INTEGRATION["host_managed"]:
416
+ sink_id = _root_logger.add(
417
+ _make_console_sink(settings.format),
418
+ level=settings.level,
419
+ enqueue=False,
420
+ backtrace=False,
421
+ diagnose=False,
422
+ )
423
+ _OS_SINK_IDS.append(sink_id)
424
+
425
+ # File sink (structured JSONL, rotated)
426
+ if settings.log_dir is not None:
427
+ try:
428
+ file_path = _make_file_sink_path(settings.log_dir, settings.log_file_name)
429
+ _attach_jsonl_file_sink(
430
+ file_path=file_path,
431
+ level=settings.level,
432
+ rotation=settings.rotation,
433
+ retention=settings.retention,
434
+ )
435
+ except Exception as e:
436
+ sys.stderr.write(f"[openspeech] failed to attach file log sink: {e}\n")
437
+
438
+ # Update module-level current settings.
439
+ global _CURRENT
440
+ _CURRENT = settings
441
+ return settings
442
+
443
+
444
+ def _remove_os_sinks() -> None:
445
+ """Remove all sinks previously added by OpenSpeech (leave host sinks alone)."""
446
+ while _OS_SINK_IDS:
447
+ sink_id = _OS_SINK_IDS.pop()
448
+ try:
449
+ _root_logger.remove(sink_id)
450
+ except ValueError:
451
+ pass # already removed
452
+
453
+
454
+ def ensure_configured() -> LogSettings:
455
+ """Call ``configure_logging`` with defaults if it hasn't been called yet.
456
+
457
+ When a host application has already taken over via
458
+ :func:`integrate_with_host` (``host_managed=True``), this is a no-op —
459
+ we don't want to add sinks on top of the host's pipeline.
460
+ """
461
+ if _INTEGRATION["host_managed"]:
462
+ return _CURRENT
463
+ if not getattr(_CURRENT, "_applied", False):
464
+ s = configure_logging()
465
+ object.__setattr__(s, "_applied", True)
466
+ return s
467
+ return _CURRENT
468
+
469
+
470
+ # ---------------------------------------------------------------------------
471
+ # Host integration (let another loguru-based app own the sinks)
472
+ # ---------------------------------------------------------------------------
473
+
474
+
475
+ def integrate_with_host(
476
+ *,
477
+ tag: str = "openspeech",
478
+ level: str | None = None,
479
+ perf: str | None = None,
480
+ attach_sinks: bool = False,
481
+ ) -> LogSettings:
482
+ """Hand sink management over to a host application using loguru.
483
+
484
+ The host keeps its own sinks; OpenSpeech only guarantees that every
485
+ record it emits carries ``record.extra["component"] == tag`` so the
486
+ host can format, filter, or route those records.
487
+
488
+ Parameters
489
+ ----------
490
+ tag
491
+ Value written into ``record.extra["component"]`` for every
492
+ OpenSpeech log. Defaults to ``"openspeech"``.
493
+ level
494
+ Minimum level applied *only to OpenSpeech records*. This is
495
+ enforced at sink time via :func:`openspeech_level_filter`, so the
496
+ host's own log level is unaffected. ``None`` keeps whatever the
497
+ host sink decides.
498
+ perf
499
+ Performance milestone verbosity (``off`` / ``basic`` / ``verbose``)
500
+ — independent of the host's log level.
501
+ attach_sinks
502
+ When ``False`` (default) OpenSpeech adds no sinks of its own; the
503
+ host is expected to have already registered at least one. Set to
504
+ ``True`` to keep the built-in stderr + JSONL sinks *in addition*
505
+ to the host's sinks (rarely useful; provided as an escape hatch).
506
+
507
+ Returns
508
+ -------
509
+ LogSettings
510
+ The resolved settings; also stored on the module for
511
+ :func:`get_log_settings` to return.
512
+ """
513
+ _INTEGRATION["tag"] = tag
514
+ _INTEGRATION["level"] = level.upper() if level else None
515
+ _INTEGRATION["host_managed"] = not attach_sinks
516
+
517
+ global _CURRENT
518
+ _CURRENT = LogSettings(
519
+ level=(level.upper() if level else _CURRENT.level),
520
+ format=_CURRENT.format,
521
+ log_dir=_CURRENT.log_dir,
522
+ log_file_name=_CURRENT.log_file_name,
523
+ rotation=_CURRENT.rotation,
524
+ retention=_CURRENT.retention,
525
+ perf=(perf.lower() if perf else _CURRENT.perf),
526
+ color=_CURRENT.color,
527
+ )
528
+ object.__setattr__(_CURRENT, "_applied", True)
529
+
530
+ if attach_sinks:
531
+ # Keep our sinks *in addition to* host sinks. Call the normal
532
+ # configure path, but the _INTEGRATION["host_managed"] gate is
533
+ # already off so sinks get added.
534
+ _INTEGRATION["host_managed"] = False
535
+ configure_logging(
536
+ level=_CURRENT.level,
537
+ format=_CURRENT.format,
538
+ perf=_CURRENT.perf,
539
+ )
540
+ else:
541
+ # Host is fully in control — drop any sinks we own.
542
+ _remove_os_sinks()
543
+
544
+ return _CURRENT
545
+
546
+
547
+ def openspeech_filter(record: dict[str, Any]) -> bool:
548
+ """Loguru ``filter`` that matches only OpenSpeech records.
549
+
550
+ Usage (host side)::
551
+
552
+ logger.add("openspeech.log", filter=openspeech_filter)
553
+ """
554
+ return record.get("extra", {}).get("component") == _INTEGRATION["tag"]
555
+
556
+
557
+ def openspeech_level_filter(min_level: str) -> Callable[[dict[str, Any]], bool]:
558
+ """Return a sink filter that enforces ``min_level`` *only* for OpenSpeech records.
559
+
560
+ Non-OpenSpeech records pass through unchanged — use this on a shared
561
+ host sink that receives both host and OpenSpeech logs but should apply
562
+ a different threshold to OpenSpeech.
563
+
564
+ Usage (host side)::
565
+
566
+ logger.add("wallex.log", filter=openspeech_level_filter("WARNING"))
567
+ """
568
+ target = min_level.upper()
569
+ # Resolve threshold once — loguru levels don't change at runtime.
570
+ try:
571
+ threshold_no = _root_logger.level(target).no
572
+ except Exception:
573
+ threshold_no = 20 # INFO fallback
574
+
575
+ def _filter(record: dict[str, Any]) -> bool:
576
+ if record.get("extra", {}).get("component") != _INTEGRATION["tag"]:
577
+ return True # host records bypass
578
+ return record["level"].no >= threshold_no
579
+
580
+ return _filter
581
+
582
+
583
+ def get_integration_tag() -> str:
584
+ """Return the current component tag used for OpenSpeech records."""
585
+ return _INTEGRATION["tag"]
586
+
587
+
588
+ def is_host_managed() -> bool:
589
+ """Return True if a host application has taken over sink management."""
590
+ return bool(_INTEGRATION["host_managed"])
591
+
592
+
593
+ __all__ = [
594
+ "LogSettings",
595
+ "bind_context",
596
+ "configure_logging",
597
+ "ensure_configured",
598
+ "get_integration_tag",
599
+ "get_log_settings",
600
+ "get_request_id",
601
+ "get_session_id",
602
+ "integrate_with_host",
603
+ "is_host_managed",
604
+ "logger",
605
+ "openspeech_filter",
606
+ "openspeech_level_filter",
607
+ ]
File without changes